From ab09587740fddf6b4116be7b6716ab47f34d2634 Mon Sep 17 00:00:00 2001
From: Eliad Peller <eliad@wizery.com>
Date: Fri, 27 Jul 2012 12:33:22 +0300
Subject: mac80211: add PS flag to bss_conf

Currently, ps mode is indicated per device (rather than
per interface), which doesn't make a lot of sense.

Moreover, there are subtle bugs caused by the inability
to indicate ps change along with other changes
(e.g. when the AP deauth us, we'd like to indicate
CHANGED_PS | CHANGED_ASSOC, as changing PS before
notifying about disassociation will result in null-packets
being sent (if IEEE80211_HW_SUPPORTS_DYNAMIC_PS) while
the sta is already disconnected.)

Keep the current per-device notifications, and add
parallel per-vif notifications.

In order to keep it simple, the per-device ps and
the per-vif ps are orthogonal - the per-vif ps
configuration is determined only by the user
configuration (enable/disable) and the connection
state, and is not affected by other vifs state and
(temporary) dynamic_ps/offchannel operations
(unlike per-device ps).

Signed-off-by: Eliad Peller <eliad@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index bb86aa6f98dd..d67d3bbe21c1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -171,6 +171,7 @@ struct ieee80211_low_level_stats {
  * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface.
  * @BSS_CHANGED_SSID: SSID changed for this BSS (AP mode)
  * @BSS_CHANGED_AP_PROBE_RESP: Probe Response changed for this BSS (AP mode)
+ * @BSS_CHANGED_PS: PS changed for this BSS (STA mode)
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -190,6 +191,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_IDLE		= 1<<14,
 	BSS_CHANGED_SSID		= 1<<15,
 	BSS_CHANGED_AP_PROBE_RESP	= 1<<16,
+	BSS_CHANGED_PS			= 1<<17,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -266,6 +268,8 @@ enum ieee80211_rssi_event {
  * @idle: This interface is idle. There's also a global idle flag in the
  *	hardware config which may be more appropriate depending on what
  *	your driver/device needs to do.
+ * @ps: power-save mode (STA only). This flag is NOT affected by
+ *	offchannel/dynamic_ps operations.
  * @ssid: The SSID of the current vif. Only valid in AP-mode.
  * @ssid_len: Length of SSID given in @ssid.
  * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode.
@@ -296,6 +300,7 @@ struct ieee80211_bss_conf {
 	bool arp_filter_enabled;
 	bool qos;
 	bool idle;
+	bool ps;
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
 	size_t ssid_len;
 	bool hidden_ssid;
-- 
cgit v1.2.3


From 36323f817af0376c78612cfdab714b0feb05fea5 Mon Sep 17 00:00:00 2001
From: Thomas Huehn <thomas@net.t-labs.tu-berlin.de>
Date: Mon, 23 Jul 2012 21:33:42 +0200
Subject: mac80211: move TX station pointer and restructure TX

Remove the control.sta pointer from ieee80211_tx_info to free up
sufficient space in the TX skb control buffer for the upcoming
Transmit Power Control (TPC).
Instead, the pointer is now on the stack in a new control struct
that is passed as a function parameter to the drivers' tx method.

Signed-off-by: Thomas Huehn <thomas@net.t-labs.tu-berlin.de>
Signed-off-by: Alina Friedrichsen <x-alina@gmx.net>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
[reworded commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/adm8211.c                     |  4 +++-
 drivers/net/wireless/at76c50x-usb.c                |  4 +++-
 drivers/net/wireless/ath/ath5k/mac80211-ops.c      |  3 ++-
 drivers/net/wireless/ath/ath9k/ath9k.h             |  1 +
 drivers/net/wireless/ath/ath9k/htc.h               |  1 +
 drivers/net/wireless/ath/ath9k/htc_drv_beacon.c    |  2 +-
 drivers/net/wireless/ath/ath9k/htc_drv_main.c      |  6 +++--
 drivers/net/wireless/ath/ath9k/htc_drv_txrx.c      |  2 +-
 drivers/net/wireless/ath/ath9k/main.c              |  5 ++++-
 drivers/net/wireless/ath/ath9k/xmit.c              |  9 ++++----
 drivers/net/wireless/ath/carl9170/carl9170.h       |  4 +++-
 drivers/net/wireless/ath/carl9170/tx.c             | 16 ++++++-------
 drivers/net/wireless/b43/main.c                    |  3 ++-
 drivers/net/wireless/b43legacy/main.c              |  1 +
 .../net/wireless/brcm80211/brcmsmac/mac80211_if.c  |  6 +++--
 drivers/net/wireless/iwlegacy/3945-mac.c           | 12 ++++++----
 drivers/net/wireless/iwlegacy/4965-mac.c           | 26 +++++++++++++---------
 drivers/net/wireless/iwlegacy/4965.h               |  8 +++++--
 drivers/net/wireless/iwlwifi/dvm/agn.h             |  4 +++-
 drivers/net/wireless/iwlwifi/dvm/mac80211.c        |  6 +++--
 drivers/net/wireless/iwlwifi/dvm/tx.c              | 16 +++++++------
 drivers/net/wireless/libertas_tf/main.c            |  4 +++-
 drivers/net/wireless/mac80211_hwsim.c              |  8 ++++---
 drivers/net/wireless/mwl8k.c                       | 17 ++++++++------
 drivers/net/wireless/p54/lmac.h                    |  4 +++-
 drivers/net/wireless/p54/main.c                    |  2 +-
 drivers/net/wireless/p54/txrx.c                    | 15 ++++++++-----
 drivers/net/wireless/rt2x00/rt2x00.h               |  4 +++-
 drivers/net/wireless/rt2x00/rt2x00dev.c            |  2 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c            |  4 +++-
 drivers/net/wireless/rt2x00/rt2x00queue.c          | 20 +++++++++--------
 drivers/net/wireless/rtl818x/rtl8180/dev.c         |  6 +++--
 drivers/net/wireless/rtl818x/rtl8187/dev.c         |  6 +++--
 drivers/net/wireless/rtlwifi/base.c                |  3 +--
 drivers/net/wireless/rtlwifi/core.c                |  8 ++++---
 drivers/net/wireless/rtlwifi/pci.c                 | 16 ++++++-------
 drivers/net/wireless/rtlwifi/rtl8192ce/trx.c       |  5 +++--
 drivers/net/wireless/rtlwifi/rtl8192ce/trx.h       |  1 +
 drivers/net/wireless/rtlwifi/rtl8192cu/trx.c       |  5 +++--
 drivers/net/wireless/rtlwifi/rtl8192cu/trx.h       |  4 +++-
 drivers/net/wireless/rtlwifi/rtl8192de/trx.c       |  5 +++--
 drivers/net/wireless/rtlwifi/rtl8192de/trx.h       |  1 +
 drivers/net/wireless/rtlwifi/rtl8192se/trx.c       |  5 +++--
 drivers/net/wireless/rtlwifi/rtl8192se/trx.h       |  1 +
 drivers/net/wireless/rtlwifi/usb.c                 | 15 ++++++++-----
 drivers/net/wireless/rtlwifi/wifi.h                | 13 +++++++----
 drivers/net/wireless/ti/wl1251/main.c              |  4 +++-
 drivers/net/wireless/ti/wlcore/main.c              |  6 +++--
 drivers/net/wireless/zd1211rw/zd_mac.c             |  6 +++--
 drivers/staging/winbond/wbusb.c                    |  4 +++-
 include/net/mac80211.h                             | 20 ++++++++++++-----
 net/mac80211/driver-ops.h                          |  6 +++--
 net/mac80211/tx.c                                  |  5 +++--
 53 files changed, 232 insertions(+), 132 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 689a71c1af71..154a4965be4f 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -1661,7 +1661,9 @@ static void adm8211_tx_raw(struct ieee80211_hw *dev, struct sk_buff *skb,
 }
 
 /* Put adm8211_tx_hdr on skb and transmit */
-static void adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
+static void adm8211_tx(struct ieee80211_hw *dev,
+		       struct ieee80211_tx_control *control,
+		       struct sk_buff *skb)
 {
 	struct adm8211_tx_hdr *txhdr;
 	size_t payload_len, hdrlen;
diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index efc162e0b511..abb520dec032 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -1726,7 +1726,9 @@ static void at76_mac80211_tx_callback(struct urb *urb)
 	ieee80211_wake_queues(priv->hw);
 }
 
-static void at76_mac80211_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void at76_mac80211_tx(struct ieee80211_hw *hw,
+			     struct ieee80211_tx_control *control,
+			     struct sk_buff *skb)
 {
 	struct at76_priv *priv = hw->priv;
 	struct at76_tx_buffer *tx_buffer = priv->bulk_out_buffer;
diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
index 260e7dc7f751..934f04c1cb9d 100644
--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c
+++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
@@ -55,7 +55,8 @@
 \********************/
 
 static void
-ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+ath5k_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control,
+	 struct sk_buff *skb)
 {
 	struct ath5k_hw *ah = hw->priv;
 	u16 qnum = skb_get_queue_mapping(skb);
diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index b09285c36c4a..7373e4b92c92 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h
@@ -280,6 +280,7 @@ struct ath_tx_control {
 	struct ath_txq *txq;
 	struct ath_node *an;
 	u8 paprd;
+	struct ieee80211_sta *sta;
 };
 
 #define ATH_TX_ERROR        0x01
diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h
index 936e920fb88e..b30596fcf73a 100644
--- a/drivers/net/wireless/ath/ath9k/htc.h
+++ b/drivers/net/wireless/ath/ath9k/htc.h
@@ -542,6 +542,7 @@ void ath9k_htc_stop_ani(struct ath9k_htc_priv *priv);
 
 int ath9k_tx_init(struct ath9k_htc_priv *priv);
 int ath9k_htc_tx_start(struct ath9k_htc_priv *priv,
+		       struct ieee80211_sta *sta,
 		       struct sk_buff *skb, u8 slot, bool is_cab);
 void ath9k_tx_cleanup(struct ath9k_htc_priv *priv);
 bool ath9k_htc_txq_setup(struct ath9k_htc_priv *priv, int subtype);
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
index 77d541feb910..f42d2eb6af99 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c
@@ -326,7 +326,7 @@ static void ath9k_htc_send_buffered(struct ath9k_htc_priv *priv,
 			goto next;
 		}
 
-		ret = ath9k_htc_tx_start(priv, skb, tx_slot, true);
+		ret = ath9k_htc_tx_start(priv, NULL, skb, tx_slot, true);
 		if (ret != 0) {
 			ath9k_htc_tx_clear_slot(priv, tx_slot);
 			dev_kfree_skb_any(skb);
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index c785129692ff..8a0ccf70aa14 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -856,7 +856,9 @@ set_timer:
 /* mac80211 Callbacks */
 /**********************/
 
-static void ath9k_htc_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void ath9k_htc_tx(struct ieee80211_hw *hw,
+			 struct ieee80211_tx_control *control,
+			 struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr;
 	struct ath9k_htc_priv *priv = hw->priv;
@@ -883,7 +885,7 @@ static void ath9k_htc_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 		goto fail_tx;
 	}
 
-	ret = ath9k_htc_tx_start(priv, skb, slot, false);
+	ret = ath9k_htc_tx_start(priv, control->sta, skb, slot, false);
 	if (ret != 0) {
 		ath_dbg(common, XMIT, "Tx failed\n");
 		goto clear_slot;
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index 47e61d0da33b..06cdcb772d78 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -333,12 +333,12 @@ static void ath9k_htc_tx_data(struct ath9k_htc_priv *priv,
 }
 
 int ath9k_htc_tx_start(struct ath9k_htc_priv *priv,
+		       struct ieee80211_sta *sta,
 		       struct sk_buff *skb,
 		       u8 slot, bool is_cab)
 {
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_sta *sta = tx_info->control.sta;
 	struct ieee80211_vif *vif = tx_info->control.vif;
 	struct ath9k_htc_sta *ista;
 	struct ath9k_htc_vif *avp = NULL;
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 6049d8b82855..4d8dc9ff5a75 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -694,7 +694,9 @@ mutex_unlock:
 	return r;
 }
 
-static void ath9k_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void ath9k_tx(struct ieee80211_hw *hw,
+		     struct ieee80211_tx_control *control,
+		     struct sk_buff *skb)
 {
 	struct ath_softc *sc = hw->priv;
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
@@ -754,6 +756,7 @@ static void ath9k_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	memset(&txctl, 0, sizeof(struct ath_tx_control));
 	txctl.txq = sc->tx.txq_map[skb_get_queue_mapping(skb)];
+	txctl.sta = control->sta;
 
 	ath_dbg(common, XMIT, "transmitting packet, skb: %p\n", skb);
 
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 2c9da6b2ecb1..ef91f6cc2d79 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -1773,11 +1773,12 @@ static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq,
 	TX_STAT_INC(txq->axq_qnum, queued);
 }
 
-static void setup_frame_info(struct ieee80211_hw *hw, struct sk_buff *skb,
+static void setup_frame_info(struct ieee80211_hw *hw,
+			     struct ieee80211_sta *sta,
+			     struct sk_buff *skb,
 			     int framelen)
 {
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_sta *sta = tx_info->control.sta;
 	struct ieee80211_key_conf *hw_key = tx_info->control.hw_key;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	const struct ieee80211_rate *rate;
@@ -1935,7 +1936,7 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb,
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_sta *sta = info->control.sta;
+	struct ieee80211_sta *sta = txctl->sta;
 	struct ieee80211_vif *vif = info->control.vif;
 	struct ath_softc *sc = hw->priv;
 	struct ath_txq *txq = txctl->txq;
@@ -1979,7 +1980,7 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb,
 	    !ieee80211_is_data(hdr->frame_control))
 		info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 
-	setup_frame_info(hw, skb, frmlen);
+	setup_frame_info(hw, sta, skb, frmlen);
 
 	/*
 	 * At this point, the vif, hw_key and sta pointers in the tx control
diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h
index 376be11161c0..8f0cbc35816f 100644
--- a/drivers/net/wireless/ath/carl9170/carl9170.h
+++ b/drivers/net/wireless/ath/carl9170/carl9170.h
@@ -577,7 +577,9 @@ void carl9170_rx(struct ar9170 *ar, void *buf, unsigned int len);
 void carl9170_handle_command_response(struct ar9170 *ar, void *buf, u32 len);
 
 /* TX */
-void carl9170_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb);
+void carl9170_op_tx(struct ieee80211_hw *hw,
+		    struct ieee80211_tx_control *control,
+		    struct sk_buff *skb);
 void carl9170_tx_janitor(struct work_struct *work);
 void carl9170_tx_process_status(struct ar9170 *ar,
 				const struct carl9170_rsp *cmd);
diff --git a/drivers/net/wireless/ath/carl9170/tx.c b/drivers/net/wireless/ath/carl9170/tx.c
index 6a8681407a1d..84377cf580e0 100644
--- a/drivers/net/wireless/ath/carl9170/tx.c
+++ b/drivers/net/wireless/ath/carl9170/tx.c
@@ -867,14 +867,15 @@ static bool carl9170_tx_cts_check(struct ar9170 *ar,
 	return false;
 }
 
-static int carl9170_tx_prepare(struct ar9170 *ar, struct sk_buff *skb)
+static int carl9170_tx_prepare(struct ar9170 *ar,
+			       struct ieee80211_sta *sta,
+			       struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr;
 	struct _carl9170_tx_superframe *txc;
 	struct carl9170_vif_info *cvif;
 	struct ieee80211_tx_info *info;
 	struct ieee80211_tx_rate *txrate;
-	struct ieee80211_sta *sta;
 	struct carl9170_tx_info *arinfo;
 	unsigned int hw_queue;
 	int i;
@@ -910,8 +911,6 @@ static int carl9170_tx_prepare(struct ar9170 *ar, struct sk_buff *skb)
 	else
 		cvif = NULL;
 
-	sta = info->control.sta;
-
 	txc = (void *)skb_push(skb, sizeof(*txc));
 	memset(txc, 0, sizeof(*txc));
 
@@ -1457,20 +1456,21 @@ err_unlock_rcu:
 	return false;
 }
 
-void carl9170_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+void carl9170_op_tx(struct ieee80211_hw *hw,
+		    struct ieee80211_tx_control *control,
+		    struct sk_buff *skb)
 {
 	struct ar9170 *ar = hw->priv;
 	struct ieee80211_tx_info *info;
-	struct ieee80211_sta *sta;
+	struct ieee80211_sta *sta = control->sta;
 	bool run;
 
 	if (unlikely(!IS_STARTED(ar)))
 		goto err_free;
 
 	info = IEEE80211_SKB_CB(skb);
-	sta = info->control.sta;
 
-	if (unlikely(carl9170_tx_prepare(ar, skb)))
+	if (unlikely(carl9170_tx_prepare(ar, sta, skb)))
 		goto err_free;
 
 	carl9170_tx_accounting(ar, skb);
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index b80352b308d5..2ca4e885f5ff 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3407,7 +3407,8 @@ static void b43_tx_work(struct work_struct *work)
 }
 
 static void b43_op_tx(struct ieee80211_hw *hw,
-		     struct sk_buff *skb)
+		      struct ieee80211_tx_control *control,
+		      struct sk_buff *skb)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 8156135a0590..74d4c20cc31b 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2492,6 +2492,7 @@ static void b43legacy_tx_work(struct work_struct *work)
 }
 
 static void b43legacy_op_tx(struct ieee80211_hw *hw,
+			    struct ieee80211_tx_control *control,
 			    struct sk_buff *skb)
 {
 	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index 9e79d47e077f..a7be68d2eaf7 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
@@ -264,7 +264,9 @@ static void brcms_set_basic_rate(struct brcm_rateset *rs, u16 rate, bool is_br)
 	}
 }
 
-static void brcms_ops_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void brcms_ops_tx(struct ieee80211_hw *hw,
+			 struct ieee80211_tx_control *control,
+			 struct sk_buff *skb)
 {
 	struct brcms_info *wl = hw->priv;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
@@ -276,7 +278,7 @@ static void brcms_ops_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 		goto done;
 	}
 	brcms_c_sendpkt_mac80211(wl->wlc, skb, hw);
-	tx_info->rate_driver_data[0] = tx_info->control.sta;
+	tx_info->rate_driver_data[0] = control->sta;
  done:
 	spin_unlock_bh(&wl->lock);
 }
diff --git a/drivers/net/wireless/iwlegacy/3945-mac.c b/drivers/net/wireless/iwlegacy/3945-mac.c
index faec40467208..e252acb9c862 100644
--- a/drivers/net/wireless/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/iwlegacy/3945-mac.c
@@ -460,7 +460,9 @@ il3945_build_tx_cmd_basic(struct il_priv *il, struct il_device_cmd *cmd,
  * start C_TX command process
  */
 static int
-il3945_tx_skb(struct il_priv *il, struct sk_buff *skb)
+il3945_tx_skb(struct il_priv *il,
+	      struct ieee80211_sta *sta,
+	      struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -512,7 +514,7 @@ il3945_tx_skb(struct il_priv *il, struct sk_buff *skb)
 	hdr_len = ieee80211_hdrlen(fc);
 
 	/* Find idx into station table for destination station */
-	sta_id = il_sta_id_or_broadcast(il, info->control.sta);
+	sta_id = il_sta_id_or_broadcast(il, sta);
 	if (sta_id == IL_INVALID_STATION) {
 		D_DROP("Dropping - INVALID STATION: %pM\n", hdr->addr1);
 		goto drop;
@@ -2859,7 +2861,9 @@ il3945_mac_stop(struct ieee80211_hw *hw)
 }
 
 static void
-il3945_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+il3945_mac_tx(struct ieee80211_hw *hw,
+	       struct ieee80211_tx_control *control,
+	       struct sk_buff *skb)
 {
 	struct il_priv *il = hw->priv;
 
@@ -2868,7 +2872,7 @@ il3945_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	D_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
 	     ieee80211_get_tx_rate(hw, IEEE80211_SKB_CB(skb))->bitrate);
 
-	if (il3945_tx_skb(il, skb))
+	if (il3945_tx_skb(il, control->sta, skb))
 		dev_kfree_skb_any(skb);
 
 	D_MAC80211("leave\n");
diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c
index 34f61a0581a2..eac4dc8bc879 100644
--- a/drivers/net/wireless/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/iwlegacy/4965-mac.c
@@ -1526,8 +1526,11 @@ il4965_tx_cmd_build_basic(struct il_priv *il, struct sk_buff *skb,
 }
 
 static void
-il4965_tx_cmd_build_rate(struct il_priv *il, struct il_tx_cmd *tx_cmd,
-			 struct ieee80211_tx_info *info, __le16 fc)
+il4965_tx_cmd_build_rate(struct il_priv *il,
+			 struct il_tx_cmd *tx_cmd,
+			 struct ieee80211_tx_info *info,
+			 struct ieee80211_sta *sta,
+			 __le16 fc)
 {
 	const u8 rts_retry_limit = 60;
 	u32 rate_flags;
@@ -1561,9 +1564,7 @@ il4965_tx_cmd_build_rate(struct il_priv *il, struct il_tx_cmd *tx_cmd,
 	rate_idx = info->control.rates[0].idx;
 	if ((info->control.rates[0].flags & IEEE80211_TX_RC_MCS) || rate_idx < 0
 	    || rate_idx > RATE_COUNT_LEGACY)
-		rate_idx =
-		    rate_lowest_index(&il->bands[info->band],
-				      info->control.sta);
+		rate_idx = rate_lowest_index(&il->bands[info->band], sta);
 	/* For 5 GHZ band, remap mac80211 rate indices into driver indices */
 	if (info->band == IEEE80211_BAND_5GHZ)
 		rate_idx += IL_FIRST_OFDM_RATE;
@@ -1630,11 +1631,12 @@ il4965_tx_cmd_build_hwcrypto(struct il_priv *il, struct ieee80211_tx_info *info,
  * start C_TX command process
  */
 int
-il4965_tx_skb(struct il_priv *il, struct sk_buff *skb)
+il4965_tx_skb(struct il_priv *il,
+	      struct ieee80211_sta *sta,
+	      struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_sta *sta = info->control.sta;
 	struct il_station_priv *sta_priv = NULL;
 	struct il_tx_queue *txq;
 	struct il_queue *q;
@@ -1680,7 +1682,7 @@ il4965_tx_skb(struct il_priv *il, struct sk_buff *skb)
 		sta_id = il->hw_params.bcast_id;
 	else {
 		/* Find idx into station table for destination station */
-		sta_id = il_sta_id_or_broadcast(il, info->control.sta);
+		sta_id = il_sta_id_or_broadcast(il, sta);
 
 		if (sta_id == IL_INVALID_STATION) {
 			D_DROP("Dropping - INVALID STATION: %pM\n", hdr->addr1);
@@ -1786,7 +1788,7 @@ il4965_tx_skb(struct il_priv *il, struct sk_buff *skb)
 	/* TODO need this for burst mode later on */
 	il4965_tx_cmd_build_basic(il, skb, tx_cmd, info, hdr, sta_id);
 
-	il4965_tx_cmd_build_rate(il, tx_cmd, info, fc);
+	il4965_tx_cmd_build_rate(il, tx_cmd, info, sta, fc);
 
 	il_update_stats(il, true, fc, len);
 	/*
@@ -5828,7 +5830,9 @@ il4965_mac_stop(struct ieee80211_hw *hw)
 }
 
 void
-il4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+il4965_mac_tx(struct ieee80211_hw *hw,
+	      struct ieee80211_tx_control *control,
+	      struct sk_buff *skb)
 {
 	struct il_priv *il = hw->priv;
 
@@ -5837,7 +5841,7 @@ il4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	D_TX("dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
 	     ieee80211_get_tx_rate(hw, IEEE80211_SKB_CB(skb))->bitrate);
 
-	if (il4965_tx_skb(il, skb))
+	if (il4965_tx_skb(il, control->sta, skb))
 		dev_kfree_skb_any(skb);
 
 	D_MACDUMP("leave\n");
diff --git a/drivers/net/wireless/iwlegacy/4965.h b/drivers/net/wireless/iwlegacy/4965.h
index 1db677689cfe..2d092f328547 100644
--- a/drivers/net/wireless/iwlegacy/4965.h
+++ b/drivers/net/wireless/iwlegacy/4965.h
@@ -78,7 +78,9 @@ int il4965_hw_txq_attach_buf_to_tfd(struct il_priv *il, struct il_tx_queue *txq,
 int il4965_hw_tx_queue_init(struct il_priv *il, struct il_tx_queue *txq);
 void il4965_hwrate_to_tx_control(struct il_priv *il, u32 rate_n_flags,
 				 struct ieee80211_tx_info *info);
-int il4965_tx_skb(struct il_priv *il, struct sk_buff *skb);
+int il4965_tx_skb(struct il_priv *il,
+		  struct ieee80211_sta *sta,
+		  struct sk_buff *skb);
 int il4965_tx_agg_start(struct il_priv *il, struct ieee80211_vif *vif,
 			struct ieee80211_sta *sta, u16 tid, u16 * ssn);
 int il4965_tx_agg_stop(struct il_priv *il, struct ieee80211_vif *vif,
@@ -163,7 +165,9 @@ void il4965_eeprom_release_semaphore(struct il_priv *il);
 int il4965_eeprom_check_version(struct il_priv *il);
 
 /* mac80211 handlers (for 4965) */
-void il4965_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb);
+void il4965_mac_tx(struct ieee80211_hw *hw,
+		   struct ieee80211_tx_control *control,
+		   struct sk_buff *skb);
 int il4965_mac_start(struct ieee80211_hw *hw);
 void il4965_mac_stop(struct ieee80211_hw *hw);
 void il4965_configure_filter(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/iwlwifi/dvm/agn.h b/drivers/net/wireless/iwlwifi/dvm/agn.h
index 9bb16bdf6d26..f0b8c1f7591c 100644
--- a/drivers/net/wireless/iwlwifi/dvm/agn.h
+++ b/drivers/net/wireless/iwlwifi/dvm/agn.h
@@ -201,7 +201,9 @@ void iwl_chswitch_done(struct iwl_priv *priv, bool is_success);
 
 
 /* tx */
-int iwlagn_tx_skb(struct iwl_priv *priv, struct sk_buff *skb);
+int iwlagn_tx_skb(struct iwl_priv *priv,
+		  struct ieee80211_sta *sta,
+		  struct sk_buff *skb);
 int iwlagn_tx_agg_start(struct iwl_priv *priv, struct ieee80211_vif *vif,
 			struct ieee80211_sta *sta, u16 tid, u16 *ssn);
 int iwlagn_tx_agg_oper(struct iwl_priv *priv, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
index a5f7bce96325..e64af60a37c1 100644
--- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
@@ -511,14 +511,16 @@ static void iwlagn_mac_set_wakeup(struct ieee80211_hw *hw, bool enabled)
 }
 #endif
 
-static void iwlagn_mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void iwlagn_mac_tx(struct ieee80211_hw *hw,
+			  struct ieee80211_tx_control *control,
+			  struct sk_buff *skb)
 {
 	struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw);
 
 	IWL_DEBUG_TX(priv, "dev->xmit(%d bytes) at rate 0x%02x\n", skb->len,
 		     ieee80211_get_tx_rate(hw, IEEE80211_SKB_CB(skb))->bitrate);
 
-	if (iwlagn_tx_skb(priv, skb))
+	if (iwlagn_tx_skb(priv, control->sta, skb))
 		dev_kfree_skb_any(skb);
 }
 
diff --git a/drivers/net/wireless/iwlwifi/dvm/tx.c b/drivers/net/wireless/iwlwifi/dvm/tx.c
index 5971a23aa47d..d17799b316d7 100644
--- a/drivers/net/wireless/iwlwifi/dvm/tx.c
+++ b/drivers/net/wireless/iwlwifi/dvm/tx.c
@@ -127,6 +127,7 @@ static void iwlagn_tx_cmd_build_basic(struct iwl_priv *priv,
 static void iwlagn_tx_cmd_build_rate(struct iwl_priv *priv,
 				     struct iwl_tx_cmd *tx_cmd,
 				     struct ieee80211_tx_info *info,
+				     struct ieee80211_sta *sta,
 				     __le16 fc)
 {
 	u32 rate_flags;
@@ -187,8 +188,7 @@ static void iwlagn_tx_cmd_build_rate(struct iwl_priv *priv,
 	if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS ||
 			(rate_idx < 0) || (rate_idx > IWL_RATE_COUNT_LEGACY))
 		rate_idx = rate_lowest_index(
-				&priv->eeprom_data->bands[info->band],
-				info->control.sta);
+				&priv->eeprom_data->bands[info->band], sta);
 	/* For 5 GHZ band, remap mac80211 rate indices into driver indices */
 	if (info->band == IEEE80211_BAND_5GHZ)
 		rate_idx += IWL_FIRST_OFDM_RATE;
@@ -291,7 +291,9 @@ static int iwl_sta_id_or_broadcast(struct iwl_rxon_context *context,
 /*
  * start REPLY_TX command process
  */
-int iwlagn_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
+int iwlagn_tx_skb(struct iwl_priv *priv,
+		  struct ieee80211_sta *sta,
+		  struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -345,7 +347,7 @@ int iwlagn_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 		sta_id = ctx->bcast_sta_id;
 	else {
 		/* Find index into station table for destination station */
-		sta_id = iwl_sta_id_or_broadcast(ctx, info->control.sta);
+		sta_id = iwl_sta_id_or_broadcast(ctx, sta);
 		if (sta_id == IWL_INVALID_STATION) {
 			IWL_DEBUG_DROP(priv, "Dropping - INVALID STATION: %pM\n",
 				       hdr->addr1);
@@ -355,8 +357,8 @@ int iwlagn_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 
 	IWL_DEBUG_TX(priv, "station Id %d\n", sta_id);
 
-	if (info->control.sta)
-		sta_priv = (void *)info->control.sta->drv_priv;
+	if (sta)
+		sta_priv = (void *)sta->drv_priv;
 
 	if (sta_priv && sta_priv->asleep &&
 	    (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER)) {
@@ -397,7 +399,7 @@ int iwlagn_tx_skb(struct iwl_priv *priv, struct sk_buff *skb)
 	/* TODO need this for burst mode later on */
 	iwlagn_tx_cmd_build_basic(priv, skb, tx_cmd, info, hdr, sta_id);
 
-	iwlagn_tx_cmd_build_rate(priv, tx_cmd, info, fc);
+	iwlagn_tx_cmd_build_rate(priv, tx_cmd, info, sta, fc);
 
 	memset(&info->status, 0, sizeof(info->status));
 
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index a03457292c88..7001856241e6 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -227,7 +227,9 @@ static void lbtf_free_adapter(struct lbtf_private *priv)
 	lbtf_deb_leave(LBTF_DEB_MAIN);
 }
 
-static void lbtf_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void lbtf_op_tx(struct ieee80211_hw *hw,
+		       struct ieee80211_tx_control *control,
+		       struct sk_buff *skb)
 {
 	struct lbtf_private *priv = hw->priv;
 
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 643f968b05ee..ed1386aa3b17 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -709,7 +709,9 @@ static bool mac80211_hwsim_tx_frame_no_nl(struct ieee80211_hw *hw,
 	return ack;
 }
 
-static void mac80211_hwsim_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
+			      struct ieee80211_tx_control *control,
+			      struct sk_buff *skb)
 {
 	bool ack;
 	struct ieee80211_tx_info *txi;
@@ -741,8 +743,8 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	if (txi->control.vif)
 		hwsim_check_magic(txi->control.vif);
-	if (txi->control.sta)
-		hwsim_check_sta_magic(txi->control.sta);
+	if (control->sta)
+		hwsim_check_sta_magic(control->sta);
 
 	ieee80211_tx_info_clear_status(txi);
 
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 224e03ade145..5099e5375cb3 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -1830,12 +1830,14 @@ static inline void mwl8k_tx_count_packet(struct ieee80211_sta *sta, u8 tid)
 }
 
 static void
-mwl8k_txq_xmit(struct ieee80211_hw *hw, int index, struct sk_buff *skb)
+mwl8k_txq_xmit(struct ieee80211_hw *hw,
+	       int index,
+	       struct ieee80211_sta *sta,
+	       struct sk_buff *skb)
 {
 	struct mwl8k_priv *priv = hw->priv;
 	struct ieee80211_tx_info *tx_info;
 	struct mwl8k_vif *mwl8k_vif;
-	struct ieee80211_sta *sta;
 	struct ieee80211_hdr *wh;
 	struct mwl8k_tx_queue *txq;
 	struct mwl8k_tx_desc *tx;
@@ -1867,7 +1869,6 @@ mwl8k_txq_xmit(struct ieee80211_hw *hw, int index, struct sk_buff *skb)
 	wh = &((struct mwl8k_dma_data *)skb->data)->wh;
 
 	tx_info = IEEE80211_SKB_CB(skb);
-	sta = tx_info->control.sta;
 	mwl8k_vif = MWL8K_VIF(tx_info->control.vif);
 
 	if (tx_info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) {
@@ -2019,8 +2020,8 @@ mwl8k_txq_xmit(struct ieee80211_hw *hw, int index, struct sk_buff *skb)
 	tx->pkt_phys_addr = cpu_to_le32(dma);
 	tx->pkt_len = cpu_to_le16(skb->len);
 	tx->rate_info = 0;
-	if (!priv->ap_fw && tx_info->control.sta != NULL)
-		tx->peer_id = MWL8K_STA(tx_info->control.sta)->peer_id;
+	if (!priv->ap_fw && sta != NULL)
+		tx->peer_id = MWL8K_STA(sta)->peer_id;
 	else
 		tx->peer_id = 0;
 
@@ -4364,7 +4365,9 @@ static void mwl8k_rx_poll(unsigned long data)
 /*
  * Core driver operations.
  */
-static void mwl8k_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void mwl8k_tx(struct ieee80211_hw *hw,
+		     struct ieee80211_tx_control *control,
+		     struct sk_buff *skb)
 {
 	struct mwl8k_priv *priv = hw->priv;
 	int index = skb_get_queue_mapping(skb);
@@ -4376,7 +4379,7 @@ static void mwl8k_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 		return;
 	}
 
-	mwl8k_txq_xmit(hw, index, skb);
+	mwl8k_txq_xmit(hw, index, control->sta, skb);
 }
 
 static int mwl8k_start(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/p54/lmac.h b/drivers/net/wireless/p54/lmac.h
index 3d8d622bec55..de1d46bf97df 100644
--- a/drivers/net/wireless/p54/lmac.h
+++ b/drivers/net/wireless/p54/lmac.h
@@ -526,7 +526,9 @@ int p54_init_leds(struct p54_common *priv);
 void p54_unregister_leds(struct p54_common *priv);
 
 /* xmit functions */
-void p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb);
+void p54_tx_80211(struct ieee80211_hw *dev,
+		  struct ieee80211_tx_control *control,
+		  struct sk_buff *skb);
 int p54_tx_cancel(struct p54_common *priv, __le32 req_id);
 void p54_tx(struct p54_common *priv, struct sk_buff *skb);
 
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 7cffea795ad2..5e91ad06dd5d 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -158,7 +158,7 @@ static int p54_beacon_update(struct p54_common *priv,
 	 * to cancel the old beacon template by hand, instead the firmware
 	 * will release the previous one through the feedback mechanism.
 	 */
-	p54_tx_80211(priv->hw, beacon);
+	p54_tx_80211(priv->hw, NULL, beacon);
 	priv->tsf_high32 = 0;
 	priv->tsf_low32 = 0;
 
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index f38786e02623..5861e13a6fd8 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -676,8 +676,9 @@ int p54_rx(struct ieee80211_hw *dev, struct sk_buff *skb)
 EXPORT_SYMBOL_GPL(p54_rx);
 
 static void p54_tx_80211_header(struct p54_common *priv, struct sk_buff *skb,
-				struct ieee80211_tx_info *info, u8 *queue,
-				u32 *extra_len, u16 *flags, u16 *aid,
+				struct ieee80211_tx_info *info,
+				struct ieee80211_sta *sta,
+				u8 *queue, u32 *extra_len, u16 *flags, u16 *aid,
 				bool *burst_possible)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -746,8 +747,8 @@ static void p54_tx_80211_header(struct p54_common *priv, struct sk_buff *skb,
 			}
 		}
 
-		if (info->control.sta)
-			*aid = info->control.sta->aid;
+		if (sta)
+			*aid = sta->aid;
 		break;
 	}
 }
@@ -767,7 +768,9 @@ static u8 p54_convert_algo(u32 cipher)
 	}
 }
 
-void p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb)
+void p54_tx_80211(struct ieee80211_hw *dev,
+		  struct ieee80211_tx_control *control,
+		  struct sk_buff *skb)
 {
 	struct p54_common *priv = dev->priv;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -784,7 +787,7 @@ void p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb)
 	u8 nrates = 0, nremaining = 8;
 	bool burst_allowed = false;
 
-	p54_tx_80211_header(priv, skb, info, &queue, &extra_len,
+	p54_tx_80211_header(priv, skb, info, control->sta, &queue, &extra_len,
 			    &hdr_flags, &aid, &burst_allowed);
 
 	if (p54_tx_qos_accounting_alloc(priv, skb, queue)) {
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 8afb546c2b2d..f991e8bedc70 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -1287,7 +1287,9 @@ void rt2x00lib_rxdone(struct queue_entry *entry, gfp_t gfp);
 /*
  * mac80211 handlers.
  */
-void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb);
+void rt2x00mac_tx(struct ieee80211_hw *hw,
+		  struct ieee80211_tx_control *control,
+		  struct sk_buff *skb);
 int rt2x00mac_start(struct ieee80211_hw *hw);
 void rt2x00mac_stop(struct ieee80211_hw *hw);
 int rt2x00mac_add_interface(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index a6b88bd4a1a5..a59048ffa092 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -194,7 +194,7 @@ static void rt2x00lib_bc_buffer_iter(void *data, u8 *mac,
 	 */
 	skb = ieee80211_get_buffered_bc(rt2x00dev->hw, vif);
 	while (skb) {
-		rt2x00mac_tx(rt2x00dev->hw, skb);
+		rt2x00mac_tx(rt2x00dev->hw, NULL, skb);
 		skb = ieee80211_get_buffered_bc(rt2x00dev->hw, vif);
 	}
 }
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index 4ff26c2159bf..c3d0f2f87b69 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -99,7 +99,9 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev,
 	return retval;
 }
 
-void rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+void rt2x00mac_tx(struct ieee80211_hw *hw,
+		  struct ieee80211_tx_control *control,
+		  struct sk_buff *skb)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index f7e74a0a7759..e488b944a034 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -315,6 +315,7 @@ static void rt2x00queue_create_tx_descriptor_plcp(struct rt2x00_dev *rt2x00dev,
 static void rt2x00queue_create_tx_descriptor_ht(struct rt2x00_dev *rt2x00dev,
 						struct sk_buff *skb,
 						struct txentry_desc *txdesc,
+						struct ieee80211_sta *sta,
 						const struct rt2x00_rate *hwrate)
 {
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
@@ -322,11 +323,11 @@ static void rt2x00queue_create_tx_descriptor_ht(struct rt2x00_dev *rt2x00dev,
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	struct rt2x00_sta *sta_priv = NULL;
 
-	if (tx_info->control.sta) {
+	if (sta) {
 		txdesc->u.ht.mpdu_density =
-		    tx_info->control.sta->ht_cap.ampdu_density;
+		    sta->ht_cap.ampdu_density;
 
-		sta_priv = sta_to_rt2x00_sta(tx_info->control.sta);
+		sta_priv = sta_to_rt2x00_sta(sta);
 		txdesc->u.ht.wcid = sta_priv->wcid;
 	}
 
@@ -341,8 +342,8 @@ static void rt2x00queue_create_tx_descriptor_ht(struct rt2x00_dev *rt2x00dev,
 		 * MIMO PS should be set to 1 for STA's using dynamic SM PS
 		 * when using more then one tx stream (>MCS7).
 		 */
-		if (tx_info->control.sta && txdesc->u.ht.mcs > 7 &&
-		    ((tx_info->control.sta->ht_cap.cap &
+		if (sta && txdesc->u.ht.mcs > 7 &&
+		    ((sta->ht_cap.cap &
 		      IEEE80211_HT_CAP_SM_PS) >>
 		     IEEE80211_HT_CAP_SM_PS_SHIFT) ==
 		    WLAN_HT_CAP_SM_PS_DYNAMIC)
@@ -409,7 +410,8 @@ static void rt2x00queue_create_tx_descriptor_ht(struct rt2x00_dev *rt2x00dev,
 
 static void rt2x00queue_create_tx_descriptor(struct rt2x00_dev *rt2x00dev,
 					     struct sk_buff *skb,
-					     struct txentry_desc *txdesc)
+					     struct txentry_desc *txdesc,
+					     struct ieee80211_sta *sta)
 {
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -503,7 +505,7 @@ static void rt2x00queue_create_tx_descriptor(struct rt2x00_dev *rt2x00dev,
 
 	if (test_bit(REQUIRE_HT_TX_DESC, &rt2x00dev->cap_flags))
 		rt2x00queue_create_tx_descriptor_ht(rt2x00dev, skb, txdesc,
-						    hwrate);
+						   sta, hwrate);
 	else
 		rt2x00queue_create_tx_descriptor_plcp(rt2x00dev, skb, txdesc,
 						      hwrate);
@@ -595,7 +597,7 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb,
 	 * after that we are free to use the skb->cb array
 	 * for our information.
 	 */
-	rt2x00queue_create_tx_descriptor(queue->rt2x00dev, skb, &txdesc);
+	rt2x00queue_create_tx_descriptor(queue->rt2x00dev, skb, &txdesc, NULL);
 
 	/*
 	 * All information is retrieved from the skb->cb array,
@@ -740,7 +742,7 @@ int rt2x00queue_update_beacon_locked(struct rt2x00_dev *rt2x00dev,
 	 * after that we are free to use the skb->cb array
 	 * for our information.
 	 */
-	rt2x00queue_create_tx_descriptor(rt2x00dev, intf->beacon->skb, &txdesc);
+	rt2x00queue_create_tx_descriptor(rt2x00dev, intf->beacon->skb, &txdesc, NULL);
 
 	/*
 	 * Fill in skb descriptor
diff --git a/drivers/net/wireless/rtl818x/rtl8180/dev.c b/drivers/net/wireless/rtl818x/rtl8180/dev.c
index aceaf689f737..021d83e1b1d3 100644
--- a/drivers/net/wireless/rtl818x/rtl8180/dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180/dev.c
@@ -244,7 +244,9 @@ static irqreturn_t rtl8180_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
+static void rtl8180_tx(struct ieee80211_hw *dev,
+		       struct ieee80211_tx_control *control,
+		       struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
@@ -710,7 +712,7 @@ static void rtl8180_beacon_work(struct work_struct *work)
 	/* TODO: use actual beacon queue */
 	skb_set_queue_mapping(skb, 0);
 
-	rtl8180_tx(dev, skb);
+	rtl8180_tx(dev, NULL, skb);
 
 resched:
 	/*
diff --git a/drivers/net/wireless/rtl818x/rtl8187/dev.c b/drivers/net/wireless/rtl818x/rtl8187/dev.c
index 71a30b026089..05d8ca045afd 100644
--- a/drivers/net/wireless/rtl818x/rtl8187/dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187/dev.c
@@ -228,7 +228,9 @@ static void rtl8187_tx_cb(struct urb *urb)
 	}
 }
 
-static void rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
+static void rtl8187_tx(struct ieee80211_hw *dev,
+		       struct ieee80211_tx_control *control,
+		       struct sk_buff *skb)
 {
 	struct rtl8187_priv *priv = dev->priv;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -1076,7 +1078,7 @@ static void rtl8187_beacon_work(struct work_struct *work)
 	/* TODO: use actual beacon queue */
 	skb_set_queue_mapping(skb, 0);
 
-	rtl8187_tx(dev, skb);
+	rtl8187_tx(dev, NULL, skb);
 
 resched:
 	/*
diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c
index 942e56b77b60..59381fe8ed06 100644
--- a/drivers/net/wireless/rtlwifi/base.c
+++ b/drivers/net/wireless/rtlwifi/base.c
@@ -1341,9 +1341,8 @@ int rtl_send_smps_action(struct ieee80211_hw *hw,
 		rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0);
 
 		info->control.rates[0].idx = 0;
-		info->control.sta = sta;
 		info->band = hw->conf.channel->band;
-		rtlpriv->intf_ops->adapter_tx(hw, skb, &tcb_desc);
+		rtlpriv->intf_ops->adapter_tx(hw, sta, skb, &tcb_desc);
 	}
 err_free:
 	return 0;
diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c
index a18ad2a98938..a7c0e52869ba 100644
--- a/drivers/net/wireless/rtlwifi/core.c
+++ b/drivers/net/wireless/rtlwifi/core.c
@@ -124,7 +124,9 @@ static void rtl_op_stop(struct ieee80211_hw *hw)
 	mutex_unlock(&rtlpriv->locks.conf_mutex);
 }
 
-static void rtl_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void rtl_op_tx(struct ieee80211_hw *hw,
+		      struct ieee80211_tx_control *control,
+		      struct sk_buff *skb)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
@@ -138,8 +140,8 @@ static void rtl_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	if (!test_bit(RTL_STATUS_INTERFACE_START, &rtlpriv->status))
 		goto err_free;
 
-	if (!rtlpriv->intf_ops->waitq_insert(hw, skb))
-		rtlpriv->intf_ops->adapter_tx(hw, skb, &tcb_desc);
+	if (!rtlpriv->intf_ops->waitq_insert(hw, control->sta, skb))
+		rtlpriv->intf_ops->adapter_tx(hw, control->sta, skb, &tcb_desc);
 
 	return;
 
diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c
index 80f75d3ba84a..aad9d44c0a51 100644
--- a/drivers/net/wireless/rtlwifi/pci.c
+++ b/drivers/net/wireless/rtlwifi/pci.c
@@ -504,7 +504,7 @@ static void _rtl_pci_tx_chk_waitq(struct ieee80211_hw *hw)
 				_rtl_update_earlymode_info(hw, skb,
 							   &tcb_desc, tid);
 
-			rtlpriv->intf_ops->adapter_tx(hw, skb, &tcb_desc);
+			rtlpriv->intf_ops->adapter_tx(hw, NULL, skb, &tcb_desc);
 		}
 	}
 }
@@ -929,7 +929,7 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw)
 	info = IEEE80211_SKB_CB(pskb);
 	pdesc = &ring->desc[0];
 	rtlpriv->cfg->ops->fill_tx_desc(hw, hdr, (u8 *) pdesc,
-		info, pskb, BEACON_QUEUE, &tcb_desc);
+		info, NULL, pskb, BEACON_QUEUE, &tcb_desc);
 
 	__skb_queue_tail(&ring->queue, pskb);
 
@@ -1305,11 +1305,10 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw)
 }
 
 static bool rtl_pci_tx_chk_waitq_insert(struct ieee80211_hw *hw,
+					struct ieee80211_sta *sta,
 					struct sk_buff *skb)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_sta *sta = info->control.sta;
 	struct rtl_sta_info *sta_entry = NULL;
 	u8 tid = rtl_get_tid(skb);
 
@@ -1337,13 +1336,14 @@ static bool rtl_pci_tx_chk_waitq_insert(struct ieee80211_hw *hw,
 	return true;
 }
 
-static int rtl_pci_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		struct rtl_tcb_desc *ptcb_desc)
+static int rtl_pci_tx(struct ieee80211_hw *hw,
+		      struct ieee80211_sta *sta,
+		      struct sk_buff *skb,
+		      struct rtl_tcb_desc *ptcb_desc)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_sta_info *sta_entry = NULL;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_sta *sta = info->control.sta;
 	struct rtl8192_tx_ring *ring;
 	struct rtl_tx_desc *pdesc;
 	u8 idx;
@@ -1418,7 +1418,7 @@ static int rtl_pci_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		rtlpriv->cfg->ops->led_control(hw, LED_CTL_TX);
 
 	rtlpriv->cfg->ops->fill_tx_desc(hw, hdr, (u8 *)pdesc,
-			info, skb, hw_queue, ptcb_desc);
+			info, sta, skb, hw_queue, ptcb_desc);
 
 	__skb_queue_tail(&ring->queue, skb);
 
diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
index 52166640f167..390d6d4fcaa0 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.c
@@ -596,7 +596,9 @@ bool rtl92ce_rx_query_desc(struct ieee80211_hw *hw,
 
 void rtl92ce_tx_fill_desc(struct ieee80211_hw *hw,
 			  struct ieee80211_hdr *hdr, u8 *pdesc_tx,
-			  struct ieee80211_tx_info *info, struct sk_buff *skb,
+			  struct ieee80211_tx_info *info,
+			  struct ieee80211_sta *sta,
+			  struct sk_buff *skb,
 			  u8 hw_queue, struct rtl_tcb_desc *tcb_desc)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
@@ -604,7 +606,6 @@ void rtl92ce_tx_fill_desc(struct ieee80211_hw *hw,
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
 	bool defaultadapter = true;
-	struct ieee80211_sta *sta;
 	u8 *pdesc = pdesc_tx;
 	u16 seq_number;
 	__le16 fc = hdr->frame_control;
diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.h b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.h
index c4adb9777365..a7cdd514cb2e 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/trx.h
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/trx.h
@@ -713,6 +713,7 @@ struct rx_desc_92c {
 void rtl92ce_tx_fill_desc(struct ieee80211_hw *hw,
 			  struct ieee80211_hdr *hdr,
 			  u8 *pdesc, struct ieee80211_tx_info *info,
+			  struct ieee80211_sta *sta,
 			  struct sk_buff *skb, u8 hw_queue,
 			  struct rtl_tcb_desc *ptcb_desc);
 bool rtl92ce_rx_query_desc(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c
index 2e6eb356a93e..27863d773790 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.c
@@ -496,7 +496,9 @@ static void _rtl_tx_desc_checksum(u8 *txdesc)
 
 void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
 			  struct ieee80211_hdr *hdr, u8 *pdesc_tx,
-			  struct ieee80211_tx_info *info, struct sk_buff *skb,
+			  struct ieee80211_tx_info *info,
+			  struct ieee80211_sta *sta,
+			  struct sk_buff *skb,
 			  u8 queue_index,
 			  struct rtl_tcb_desc *tcb_desc)
 {
@@ -504,7 +506,6 @@ void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
 	struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
 	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
 	bool defaultadapter = true;
-	struct ieee80211_sta *sta = info->control.sta = info->control.sta;
 	u8 *qc = ieee80211_get_qos_ctl(hdr);
 	u8 tid = qc[0] & IEEE80211_QOS_CTL_TID_MASK;
 	u16 seq_number;
diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.h b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.h
index 332b06e78b00..725c53accc58 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/trx.h
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/trx.h
@@ -420,7 +420,9 @@ struct sk_buff *rtl8192c_tx_aggregate_hdl(struct ieee80211_hw *,
 					   struct sk_buff_head *);
 void rtl92cu_tx_fill_desc(struct ieee80211_hw *hw,
 			  struct ieee80211_hdr *hdr, u8 *pdesc_tx,
-			  struct ieee80211_tx_info *info, struct sk_buff *skb,
+			  struct ieee80211_tx_info *info,
+			  struct ieee80211_sta *sta,
+			  struct sk_buff *skb,
 			  u8 queue_index,
 			  struct rtl_tcb_desc *tcb_desc);
 void rtl92cu_fill_fake_txdesc(struct ieee80211_hw *hw, u8 * pDesc,
diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/rtlwifi/rtl8192de/trx.c
index f80690d82c11..4686f340b9d6 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192de/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192de/trx.c
@@ -551,7 +551,9 @@ static void _rtl92de_insert_emcontent(struct rtl_tcb_desc *ptcb_desc,
 
 void rtl92de_tx_fill_desc(struct ieee80211_hw *hw,
 			  struct ieee80211_hdr *hdr, u8 *pdesc_tx,
-			  struct ieee80211_tx_info *info, struct sk_buff *skb,
+			  struct ieee80211_tx_info *info,
+			  struct ieee80211_sta *sta,
+			  struct sk_buff *skb,
 			  u8 hw_queue, struct rtl_tcb_desc *ptcb_desc)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
@@ -559,7 +561,6 @@ void rtl92de_tx_fill_desc(struct ieee80211_hw *hw,
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
 	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
-	struct ieee80211_sta *sta = info->control.sta;
 	u8 *pdesc = pdesc_tx;
 	u16 seq_number;
 	__le16 fc = hdr->frame_control;
diff --git a/drivers/net/wireless/rtlwifi/rtl8192de/trx.h b/drivers/net/wireless/rtlwifi/rtl8192de/trx.h
index 057a52431b00..c1b5dfb79d53 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192de/trx.h
+++ b/drivers/net/wireless/rtlwifi/rtl8192de/trx.h
@@ -730,6 +730,7 @@ struct rx_desc_92d {
 void rtl92de_tx_fill_desc(struct ieee80211_hw *hw,
 			  struct ieee80211_hdr *hdr,
 			  u8 *pdesc, struct ieee80211_tx_info *info,
+			  struct ieee80211_sta *sta,
 			  struct sk_buff *skb, u8 hw_queue,
 			  struct rtl_tcb_desc *ptcb_desc);
 bool rtl92de_rx_query_desc(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c
index 36d1cb3aef8a..28c53fb12aeb 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/trx.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/trx.c
@@ -591,14 +591,15 @@ bool rtl92se_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats,
 
 void rtl92se_tx_fill_desc(struct ieee80211_hw *hw,
 		struct ieee80211_hdr *hdr, u8 *pdesc_tx,
-		struct ieee80211_tx_info *info, struct sk_buff *skb,
+		struct ieee80211_tx_info *info,
+		struct ieee80211_sta *sta,
+		struct sk_buff *skb,
 		u8 hw_queue, struct rtl_tcb_desc *ptcb_desc)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
 	struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
-	struct ieee80211_sta *sta = info->control.sta;
 	u8 *pdesc = pdesc_tx;
 	u16 seq_number;
 	__le16 fc = hdr->frame_control;
diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/trx.h b/drivers/net/wireless/rtlwifi/rtl8192se/trx.h
index 011e7b0695f2..64dd66f287c1 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/trx.h
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/trx.h
@@ -31,6 +31,7 @@
 
 void rtl92se_tx_fill_desc(struct ieee80211_hw *hw, struct ieee80211_hdr *hdr,
 			  u8 *pdesc, struct ieee80211_tx_info *info,
+			  struct ieee80211_sta *sta,
 			  struct sk_buff *skb, u8 hw_queue,
 			  struct rtl_tcb_desc *ptcb_desc);
 void rtl92se_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc, bool firstseg,
diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c
index aa970fc18a21..914046903cfd 100644
--- a/drivers/net/wireless/rtlwifi/usb.c
+++ b/drivers/net/wireless/rtlwifi/usb.c
@@ -848,8 +848,10 @@ static void _rtl_usb_transmit(struct ieee80211_hw *hw, struct sk_buff *skb,
 	_rtl_submit_tx_urb(hw, _urb);
 }
 
-static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw, struct sk_buff *skb,
-			    u16 hw_queue)
+static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw,
+				   struct ieee80211_sta *sta,
+				   struct sk_buff *skb,
+				   u16 hw_queue)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
@@ -891,7 +893,7 @@ static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw, struct sk_buff *skb,
 		seq_number += 1;
 		seq_number <<= 4;
 	}
-	rtlpriv->cfg->ops->fill_tx_desc(hw, hdr, (u8 *)pdesc, info, skb,
+	rtlpriv->cfg->ops->fill_tx_desc(hw, hdr, (u8 *)pdesc, info, sta, skb,
 					hw_queue, &tcb_desc);
 	if (!ieee80211_has_morefrags(hdr->frame_control)) {
 		if (qc)
@@ -901,7 +903,9 @@ static void _rtl_usb_tx_preprocess(struct ieee80211_hw *hw, struct sk_buff *skb,
 		rtlpriv->cfg->ops->led_control(hw, LED_CTL_TX);
 }
 
-static int rtl_usb_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
+static int rtl_usb_tx(struct ieee80211_hw *hw,
+		      struct ieee80211_sta *sta,
+		      struct sk_buff *skb,
 		      struct rtl_tcb_desc *dummy)
 {
 	struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw));
@@ -913,7 +917,7 @@ static int rtl_usb_tx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	if (unlikely(is_hal_stop(rtlhal)))
 		goto err_free;
 	hw_queue = rtlusb->usb_mq_to_hwq(fc, skb_get_queue_mapping(skb));
-	_rtl_usb_tx_preprocess(hw, skb, hw_queue);
+	_rtl_usb_tx_preprocess(hw, sta, skb, hw_queue);
 	_rtl_usb_transmit(hw, skb, hw_queue);
 	return NETDEV_TX_OK;
 
@@ -923,6 +927,7 @@ err_free:
 }
 
 static bool rtl_usb_tx_chk_waitq_insert(struct ieee80211_hw *hw,
+					struct ieee80211_sta *sta,
 					struct sk_buff *skb)
 {
 	return false;
diff --git a/drivers/net/wireless/rtlwifi/wifi.h b/drivers/net/wireless/rtlwifi/wifi.h
index cdaa21f29710..40153e7bf702 100644
--- a/drivers/net/wireless/rtlwifi/wifi.h
+++ b/drivers/net/wireless/rtlwifi/wifi.h
@@ -122,7 +122,7 @@ enum rt_eeprom_type {
 	EEPROM_BOOT_EFUSE,
 };
 
-enum rtl_status {
+enum ttl_status {
 	RTL_STATUS_INTERFACE_START = 0,
 };
 
@@ -1418,6 +1418,7 @@ struct rtl_hal_ops {
 	void (*fill_tx_desc) (struct ieee80211_hw *hw,
 			      struct ieee80211_hdr *hdr, u8 *pdesc_tx,
 			      struct ieee80211_tx_info *info,
+			      struct ieee80211_sta *sta,
 			      struct sk_buff *skb, u8 hw_queue,
 			      struct rtl_tcb_desc *ptcb_desc);
 	void (*fill_fake_txdesc) (struct ieee80211_hw *hw, u8 *pDesc,
@@ -1475,11 +1476,15 @@ struct rtl_intf_ops {
 	int (*adapter_start) (struct ieee80211_hw *hw);
 	void (*adapter_stop) (struct ieee80211_hw *hw);
 
-	int (*adapter_tx) (struct ieee80211_hw *hw, struct sk_buff *skb,
-			struct rtl_tcb_desc *ptcb_desc);
+	int (*adapter_tx) (struct ieee80211_hw *hw,
+			   struct ieee80211_sta *sta,
+			   struct sk_buff *skb,
+			   struct rtl_tcb_desc *ptcb_desc);
 	void (*flush)(struct ieee80211_hw *hw, bool drop);
 	int (*reset_trx_ring) (struct ieee80211_hw *hw);
-	bool (*waitq_insert) (struct ieee80211_hw *hw, struct sk_buff *skb);
+	bool (*waitq_insert) (struct ieee80211_hw *hw,
+			      struct ieee80211_sta *sta,
+			      struct sk_buff *skb);
 
 	/*pci */
 	void (*disable_aspm) (struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 3118c425bcf1..441cbccbd381 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -354,7 +354,9 @@ out:
 	return ret;
 }
 
-static void wl1251_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void wl1251_op_tx(struct ieee80211_hw *hw,
+			 struct ieee80211_tx_control *control,
+			 struct sk_buff *skb)
 {
 	struct wl1251 *wl = hw->priv;
 	unsigned long flags;
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 5efd5919db3b..ff830cf50c70 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -1181,7 +1181,9 @@ out:
 	return ret;
 }
 
-static void wl1271_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void wl1271_op_tx(struct ieee80211_hw *hw,
+			 struct ieee80211_tx_control *control,
+			 struct sk_buff *skb)
 {
 	struct wl1271 *wl = hw->priv;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -1197,7 +1199,7 @@ static void wl1271_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
 	mapping = skb_get_queue_mapping(skb);
 	q = wl1271_tx_get_queue(mapping);
 
-	hlid = wl12xx_tx_get_hlid(wl, wlvif, skb, info->control.sta);
+	hlid = wl12xx_tx_get_hlid(wl, wlvif, skb, control->sta);
 
 	spin_lock_irqsave(&wl->wl_lock, flags);
 
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index c9e2660e1263..459880104758 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -937,7 +937,9 @@ static int fill_ctrlset(struct zd_mac *mac,
  * control block of the skbuff will be initialized. If necessary the incoming
  * mac80211 queues will be stopped.
  */
-static void zd_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb)
+static void zd_op_tx(struct ieee80211_hw *hw,
+		     struct ieee80211_tx_control *control,
+		     struct sk_buff *skb)
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -1176,7 +1178,7 @@ static void zd_beacon_done(struct zd_mac *mac)
 		skb = ieee80211_get_buffered_bc(mac->hw, mac->vif);
 		if (!skb)
 			break;
-		zd_op_tx(mac->hw, skb);
+		zd_op_tx(mac->hw, NULL, skb);
 	}
 
 	/*
diff --git a/drivers/staging/winbond/wbusb.c b/drivers/staging/winbond/wbusb.c
index ef360547ecec..b76d95e180fa 100644
--- a/drivers/staging/winbond/wbusb.c
+++ b/drivers/staging/winbond/wbusb.c
@@ -119,7 +119,9 @@ static void wbsoft_configure_filter(struct ieee80211_hw *dev,
 	*total_flags = new_flags;
 }
 
-static void wbsoft_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
+static void wbsoft_tx(struct ieee80211_hw *dev,
+		      struct ieee80211_tx_control *control,
+		      struct sk_buff *skb)
 {
 	struct wbsoft_priv *priv = dev->priv;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d67d3bbe21c1..8114f590f715 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -527,9 +527,6 @@ struct ieee80211_tx_rate {
  *  (2) driver internal use (if applicable)
  *  (3) TX status information - driver tells mac80211 what happened
  *
- * The TX control's sta pointer is only valid during the ->tx call,
- * it may be NULL.
- *
  * @flags: transmit info flags, defined above
  * @band: the band to transmit on (use for checking for races)
  * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
@@ -560,6 +557,7 @@ struct ieee80211_tx_info {
 					struct ieee80211_tx_rate rates[
 						IEEE80211_TX_MAX_RATES];
 					s8 rts_cts_rate_idx;
+					/* 3 bytes free */
 				};
 				/* only needed before rate control */
 				unsigned long jiffies;
@@ -567,7 +565,7 @@ struct ieee80211_tx_info {
 			/* NB: vif can be NULL for injected frames */
 			struct ieee80211_vif *vif;
 			struct ieee80211_key_conf *hw_key;
-			struct ieee80211_sta *sta;
+			/* 8 bytes free */
 		} control;
 		struct {
 			struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES];
@@ -1078,6 +1076,16 @@ enum sta_notify_cmd {
 	STA_NOTIFY_SLEEP, STA_NOTIFY_AWAKE,
 };
 
+/**
+ * struct ieee80211_tx_control - TX control data
+ *
+ * @sta: station table entry, this sta pointer may be NULL and
+ * 	it is not allowed to copy the pointer, due to RCU.
+ */
+struct ieee80211_tx_control {
+	struct ieee80211_sta *sta;
+};
+
 /**
  * enum ieee80211_hw_flags - hardware flags
  *
@@ -2269,7 +2277,9 @@ enum ieee80211_rate_control_changed {
  *	The callback is optional and can (should!) sleep.
  */
 struct ieee80211_ops {
-	void (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb);
+	void (*tx)(struct ieee80211_hw *hw,
+		   struct ieee80211_tx_control *control,
+		   struct sk_buff *skb);
 	int (*start)(struct ieee80211_hw *hw);
 	void (*stop)(struct ieee80211_hw *hw);
 #ifdef CONFIG_PM
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index df9203199102..a81117a83996 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -22,9 +22,11 @@ get_bss_sdata(struct ieee80211_sub_if_data *sdata)
 	return sdata;
 }
 
-static inline void drv_tx(struct ieee80211_local *local, struct sk_buff *skb)
+static inline void drv_tx(struct ieee80211_local *local,
+			  struct ieee80211_tx_control *control,
+			  struct sk_buff *skb)
 {
-	local->ops->tx(&local->hw, skb);
+	local->ops->tx(&local->hw, control, skb);
 }
 
 static inline void drv_get_et_strings(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index acf712ffb5e6..7558ba58ea23 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1204,6 +1204,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
 			       struct sk_buff_head *skbs,
 			       bool txpending)
 {
+	struct ieee80211_tx_control control;
 	struct sk_buff *skb, *tmp;
 	unsigned long flags;
 
@@ -1240,10 +1241,10 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
 		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
 		info->control.vif = vif;
-		info->control.sta = sta;
+		control.sta = sta;
 
 		__skb_unlink(skb, skbs);
-		drv_tx(local, skb);
+		drv_tx(local, &control, skb);
 	}
 
 	return true;
-- 
cgit v1.2.3


From 4778e0be16c291ba6d9d55eeff3a6764fc84a071 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 27 Jul 2012 06:28:52 +0000
Subject: netlink: add signed types

Signed types might be needed in NL communication from time to time
(I need s32 in team driver), so add them.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 785f37a3b44e..09175d5d1fbf 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -98,6 +98,10 @@
  *   nla_put_u16(skb, type, value)	add u16 attribute to skb
  *   nla_put_u32(skb, type, value)	add u32 attribute to skb
  *   nla_put_u64(skb, type, value)	add u64 attribute to skb
+ *   nla_put_s8(skb, type, value)	add s8 attribute to skb
+ *   nla_put_s16(skb, type, value)	add s16 attribute to skb
+ *   nla_put_s32(skb, type, value)	add s32 attribute to skb
+ *   nla_put_s64(skb, type, value)	add s64 attribute to skb
  *   nla_put_string(skb, type, str)	add string attribute to skb
  *   nla_put_flag(skb, type)		add flag attribute to skb
  *   nla_put_msecs(skb, type, jiffies)	add msecs attribute to skb
@@ -121,6 +125,10 @@
  *   nla_get_u16(nla)			get payload for a u16 attribute
  *   nla_get_u32(nla)			get payload for a u32 attribute
  *   nla_get_u64(nla)			get payload for a u64 attribute
+ *   nla_get_s8(nla)			get payload for a s8 attribute
+ *   nla_get_s16(nla)			get payload for a s16 attribute
+ *   nla_get_s32(nla)			get payload for a s32 attribute
+ *   nla_get_s64(nla)			get payload for a s64 attribute
  *   nla_get_flag(nla)			return 1 if flag is true
  *   nla_get_msecs(nla)			get payload for a msecs attribute
  *
@@ -160,6 +168,10 @@ enum {
 	NLA_NESTED_COMPAT,
 	NLA_NUL_STRING,
 	NLA_BINARY,
+	NLA_S8,
+	NLA_S16,
+	NLA_S32,
+	NLA_S64,
 	__NLA_TYPE_MAX,
 };
 
@@ -183,6 +195,8 @@ enum {
  *    NLA_NESTED_COMPAT    Minimum length of structure payload
  *    NLA_U8, NLA_U16,
  *    NLA_U32, NLA_U64,
+ *    NLA_S8, NLA_S16,
+ *    NLA_S32, NLA_S64,
  *    NLA_MSECS            Leaving the length field zero will verify the
  *                         given type fits, using it verifies minimum length
  *                         just like "All other"
@@ -878,6 +892,50 @@ static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value)
 	return nla_put(skb, attrtype, sizeof(__le64), &value);
 }
 
+/**
+ * nla_put_s8 - Add a s8 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_s8(struct sk_buff *skb, int attrtype, s8 value)
+{
+	return nla_put(skb, attrtype, sizeof(s8), &value);
+}
+
+/**
+ * nla_put_s16 - Add a s16 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_s16(struct sk_buff *skb, int attrtype, s16 value)
+{
+	return nla_put(skb, attrtype, sizeof(s16), &value);
+}
+
+/**
+ * nla_put_s32 - Add a s32 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_s32(struct sk_buff *skb, int attrtype, s32 value)
+{
+	return nla_put(skb, attrtype, sizeof(s32), &value);
+}
+
+/**
+ * nla_put_s64 - Add a s64 netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_s64(struct sk_buff *skb, int attrtype, s64 value)
+{
+	return nla_put(skb, attrtype, sizeof(s64), &value);
+}
+
 /**
  * nla_put_string - Add a string netlink attribute to a socket buffer
  * @skb: socket buffer to add attribute to
@@ -993,6 +1051,46 @@ static inline __be64 nla_get_be64(const struct nlattr *nla)
 	return tmp;
 }
 
+/**
+ * nla_get_s32 - return payload of s32 attribute
+ * @nla: s32 netlink attribute
+ */
+static inline s32 nla_get_s32(const struct nlattr *nla)
+{
+	return *(s32 *) nla_data(nla);
+}
+
+/**
+ * nla_get_s16 - return payload of s16 attribute
+ * @nla: s16 netlink attribute
+ */
+static inline s16 nla_get_s16(const struct nlattr *nla)
+{
+	return *(s16 *) nla_data(nla);
+}
+
+/**
+ * nla_get_s8 - return payload of s8 attribute
+ * @nla: s8 netlink attribute
+ */
+static inline s8 nla_get_s8(const struct nlattr *nla)
+{
+	return *(s8 *) nla_data(nla);
+}
+
+/**
+ * nla_get_s64 - return payload of s64 attribute
+ * @nla: s64 netlink attribute
+ */
+static inline s64 nla_get_s64(const struct nlattr *nla)
+{
+	s64 tmp;
+
+	nla_memcpy(&tmp, nla, sizeof(tmp));
+
+	return tmp;
+}
+
 /**
  * nla_get_flag - return payload of flag attribute
  * @nla: flag netlink attribute
-- 
cgit v1.2.3


From 69821638b27407d8648cb04de01b06b30a291bde Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 27 Jul 2012 06:28:53 +0000
Subject: team: add signed 32-bit team option type

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 12 ++++++++++++
 include/linux/if_team.h |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 87707ab39430..70752e631a12 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1787,6 +1787,12 @@ static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team,
 		    nla_put_flag(skb, TEAM_ATTR_OPTION_DATA))
 			goto nest_cancel;
 		break;
+	case TEAM_OPTION_TYPE_S32:
+		if (nla_put_u8(skb, TEAM_ATTR_OPTION_TYPE, NLA_S32))
+			goto nest_cancel;
+		if (nla_put_s32(skb, TEAM_ATTR_OPTION_DATA, ctx.data.s32_val))
+			goto nest_cancel;
+		break;
 	default:
 		BUG();
 	}
@@ -1975,6 +1981,9 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
 		case NLA_FLAG:
 			opt_type = TEAM_OPTION_TYPE_BOOL;
 			break;
+		case NLA_S32:
+			opt_type = TEAM_OPTION_TYPE_S32;
+			break;
 		default:
 			goto team_put;
 		}
@@ -2031,6 +2040,9 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
 			case TEAM_OPTION_TYPE_BOOL:
 				ctx.data.bool_val = attr_data ? true : false;
 				break;
+			case TEAM_OPTION_TYPE_S32:
+				ctx.data.s32_val = nla_get_s32(attr_data);
+				break;
 			default:
 				BUG();
 			}
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 6960fc1841a7..e5571c420800 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -130,6 +130,7 @@ enum team_option_type {
 	TEAM_OPTION_TYPE_STRING,
 	TEAM_OPTION_TYPE_BINARY,
 	TEAM_OPTION_TYPE_BOOL,
+	TEAM_OPTION_TYPE_S32,
 };
 
 struct team_option_inst_info {
@@ -146,6 +147,7 @@ struct team_gsetter_ctx {
 			u32 len;
 		} bin_val;
 		bool bool_val;
+		s32 s32_val;
 	} data;
 	struct team_option_inst_info *info;
 };
-- 
cgit v1.2.3


From a86fc6b7d603992070c04bd7a8c217d55688b077 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 27 Jul 2012 06:28:54 +0000
Subject: team: add per port priority option

Allow userspace to set port priority.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 25 +++++++++++++++++++++++++
 include/linux/if_team.h |  1 +
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 70752e631a12..a30b7c1bd9f6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1092,6 +1092,24 @@ static int team_user_linkup_en_option_set(struct team *team,
 	return 0;
 }
 
+static int team_priority_option_get(struct team *team,
+				    struct team_gsetter_ctx *ctx)
+{
+	struct team_port *port = ctx->info->port;
+
+	ctx->data.s32_val = port->priority;
+	return 0;
+}
+
+static int team_priority_option_set(struct team *team,
+				    struct team_gsetter_ctx *ctx)
+{
+	struct team_port *port = ctx->info->port;
+
+	port->priority = ctx->data.s32_val;
+	return 0;
+}
+
 static const struct team_option team_options[] = {
 	{
 		.name = "mode",
@@ -1120,6 +1138,13 @@ static const struct team_option team_options[] = {
 		.getter = team_user_linkup_en_option_get,
 		.setter = team_user_linkup_en_option_set,
 	},
+	{
+		.name = "priority",
+		.type = TEAM_OPTION_TYPE_S32,
+		.per_port = true,
+		.getter = team_priority_option_get,
+		.setter = team_priority_option_set,
+	},
 };
 
 static struct lock_class_key team_netdev_xmit_lock_key;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index e5571c420800..06495b3e7a99 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -67,6 +67,7 @@ struct team_port {
 	struct netpoll *np;
 #endif
 
+	s32 priority; /* lower number ~ higher priority */
 	long mode_priv[0];
 };
 
-- 
cgit v1.2.3


From 8ff5105a2b9dd0ba596719b165c1827d101e5f1a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 27 Jul 2012 06:28:55 +0000
Subject: team: add support for queue override by setting queue_id for port

Similar to what bonding has. This allows to set queue_id for port so
this port will be used when skb with matching skb->queue_mapping is
going to be transmitted.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/if_team.h |   4 ++
 2 files changed, 165 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index a30b7c1bd9f6..ba10c469b02b 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -658,6 +658,122 @@ static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
 }
 
 
+/*************************************
+ * Multiqueue Tx port select override
+ *************************************/
+
+static int team_queue_override_init(struct team *team)
+{
+	struct list_head *listarr;
+	unsigned int queue_cnt = team->dev->num_tx_queues - 1;
+	unsigned int i;
+
+	if (!queue_cnt)
+		return 0;
+	listarr = kmalloc(sizeof(struct list_head) * queue_cnt, GFP_KERNEL);
+	if (!listarr)
+		return -ENOMEM;
+	team->qom_lists = listarr;
+	for (i = 0; i < queue_cnt; i++)
+		INIT_LIST_HEAD(listarr++);
+	return 0;
+}
+
+static void team_queue_override_fini(struct team *team)
+{
+	kfree(team->qom_lists);
+}
+
+static struct list_head *__team_get_qom_list(struct team *team, u16 queue_id)
+{
+	return &team->qom_lists[queue_id - 1];
+}
+
+/*
+ * note: already called with rcu_read_lock
+ */
+static bool team_queue_override_transmit(struct team *team, struct sk_buff *skb)
+{
+	struct list_head *qom_list;
+	struct team_port *port;
+
+	if (!team->queue_override_enabled || !skb->queue_mapping)
+		return false;
+	qom_list = __team_get_qom_list(team, skb->queue_mapping);
+	list_for_each_entry_rcu(port, qom_list, qom_list) {
+		if (!team_dev_queue_xmit(team, port, skb))
+			return true;
+	}
+	return false;
+}
+
+static void __team_queue_override_port_del(struct team *team,
+					   struct team_port *port)
+{
+	list_del_rcu(&port->qom_list);
+	synchronize_rcu();
+	INIT_LIST_HEAD(&port->qom_list);
+}
+
+static bool team_queue_override_port_has_gt_prio_than(struct team_port *port,
+						      struct team_port *cur)
+{
+	if (port->priority < cur->priority)
+		return true;
+	if (port->priority > cur->priority)
+		return false;
+	if (port->index < cur->index)
+		return true;
+	return false;
+}
+
+static void __team_queue_override_port_add(struct team *team,
+					   struct team_port *port)
+{
+	struct team_port *cur;
+	struct list_head *qom_list;
+	struct list_head *node;
+
+	if (!port->queue_id || !team_port_enabled(port))
+		return;
+
+	qom_list = __team_get_qom_list(team, port->queue_id);
+	node = qom_list;
+	list_for_each_entry(cur, qom_list, qom_list) {
+		if (team_queue_override_port_has_gt_prio_than(port, cur))
+			break;
+		node = &cur->qom_list;
+	}
+	list_add_tail_rcu(&port->qom_list, node);
+}
+
+static void __team_queue_override_enabled_check(struct team *team)
+{
+	struct team_port *port;
+	bool enabled = false;
+
+	list_for_each_entry(port, &team->port_list, list) {
+		if (!list_empty(&port->qom_list)) {
+			enabled = true;
+			break;
+		}
+	}
+	if (enabled == team->queue_override_enabled)
+		return;
+	netdev_dbg(team->dev, "%s queue override\n",
+		   enabled ? "Enabling" : "Disabling");
+	team->queue_override_enabled = enabled;
+}
+
+static void team_queue_override_port_refresh(struct team *team,
+					     struct team_port *port)
+{
+	__team_queue_override_port_del(team, port);
+	__team_queue_override_port_add(team, port);
+	__team_queue_override_enabled_check(team);
+}
+
+
 /****************
  * Port handling
  ****************/
@@ -688,6 +804,7 @@ static void team_port_enable(struct team *team,
 	hlist_add_head_rcu(&port->hlist,
 			   team_port_index_hash(team, port->index));
 	team_adjust_ops(team);
+	team_queue_override_port_refresh(team, port);
 	if (team->ops.port_enabled)
 		team->ops.port_enabled(team, port);
 }
@@ -716,6 +833,7 @@ static void team_port_disable(struct team *team,
 	hlist_del_rcu(&port->hlist);
 	__reconstruct_port_hlist(team, port->index);
 	port->index = -1;
+	team_queue_override_port_refresh(team, port);
 	__team_adjust_ops(team, team->en_port_count - 1);
 	/*
 	 * Wait until readers see adjusted ops. This ensures that
@@ -881,6 +999,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 
 	port->dev = port_dev;
 	port->team = team;
+	INIT_LIST_HEAD(&port->qom_list);
 
 	port->orig.mtu = port_dev->mtu;
 	err = dev_set_mtu(port_dev, dev->mtu);
@@ -1107,9 +1226,34 @@ static int team_priority_option_set(struct team *team,
 	struct team_port *port = ctx->info->port;
 
 	port->priority = ctx->data.s32_val;
+	team_queue_override_port_refresh(team, port);
 	return 0;
 }
 
+static int team_queue_id_option_get(struct team *team,
+				    struct team_gsetter_ctx *ctx)
+{
+	struct team_port *port = ctx->info->port;
+
+	ctx->data.u32_val = port->queue_id;
+	return 0;
+}
+
+static int team_queue_id_option_set(struct team *team,
+				    struct team_gsetter_ctx *ctx)
+{
+	struct team_port *port = ctx->info->port;
+
+	if (port->queue_id == ctx->data.u32_val)
+		return 0;
+	if (ctx->data.u32_val >= team->dev->real_num_tx_queues)
+		return -EINVAL;
+	port->queue_id = ctx->data.u32_val;
+	team_queue_override_port_refresh(team, port);
+	return 0;
+}
+
+
 static const struct team_option team_options[] = {
 	{
 		.name = "mode",
@@ -1145,6 +1289,13 @@ static const struct team_option team_options[] = {
 		.getter = team_priority_option_get,
 		.setter = team_priority_option_set,
 	},
+	{
+		.name = "queue_id",
+		.type = TEAM_OPTION_TYPE_U32,
+		.per_port = true,
+		.getter = team_queue_id_option_get,
+		.setter = team_queue_id_option_set,
+	},
 };
 
 static struct lock_class_key team_netdev_xmit_lock_key;
@@ -1180,6 +1331,9 @@ static int team_init(struct net_device *dev)
 	for (i = 0; i < TEAM_PORT_HASHENTRIES; i++)
 		INIT_HLIST_HEAD(&team->en_port_hlist[i]);
 	INIT_LIST_HEAD(&team->port_list);
+	err = team_queue_override_init(team);
+	if (err)
+		goto err_team_queue_override_init;
 
 	team_adjust_ops(team);
 
@@ -1195,6 +1349,8 @@ static int team_init(struct net_device *dev)
 	return 0;
 
 err_options_register:
+	team_queue_override_fini(team);
+err_team_queue_override_init:
 	free_percpu(team->pcpu_stats);
 
 	return err;
@@ -1212,6 +1368,7 @@ static void team_uninit(struct net_device *dev)
 
 	__team_change_mode(team, NULL); /* cleanup */
 	__team_options_unregister(team, team_options, ARRAY_SIZE(team_options));
+	team_queue_override_fini(team);
 	mutex_unlock(&team->lock);
 }
 
@@ -1241,10 +1398,12 @@ static int team_close(struct net_device *dev)
 static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct team *team = netdev_priv(dev);
-	bool tx_success = false;
+	bool tx_success;
 	unsigned int len = skb->len;
 
-	tx_success = team->ops.transmit(team, skb);
+	tx_success = team_queue_override_transmit(team, skb);
+	if (!tx_success)
+		tx_success = team->ops.transmit(team, skb);
 	if (tx_success) {
 		struct team_pcpu_stats *pcpu_stats;
 
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 06495b3e7a99..33fcc20b5881 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -68,6 +68,8 @@ struct team_port {
 #endif
 
 	s32 priority; /* lower number ~ higher priority */
+	u16 queue_id;
+	struct list_head qom_list; /* node in queue override mapping list */
 	long mode_priv[0];
 };
 
@@ -200,6 +202,8 @@ struct team {
 
 	const struct team_mode *mode;
 	struct team_mode_ops ops;
+	bool queue_override_enabled;
+	struct list_head *qom_lists; /* array of queue override mapping lists */
 	long mode_priv[TEAM_MODE_PRIV_LONGS];
 };
 
-- 
cgit v1.2.3


From 47061bc440b90a16d856fb0dba1cffc36427efa4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Aug 2012 20:54:15 +0000
Subject: net: skb_share_check() should use consume_skb()

In order to avoid false drop_monitor indications, we should
call consume_skb() if skb_clone() was successful.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7632c87da2c9..b33a3a1f205e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -846,13 +846,16 @@ static inline int skb_shared(const struct sk_buff *skb)
  *
  *	NULL is returned on a memory allocation failure.
  */
-static inline struct sk_buff *skb_share_check(struct sk_buff *skb,
-					      gfp_t pri)
+static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri)
 {
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_shared(skb)) {
 		struct sk_buff *nskb = skb_clone(skb, pri);
-		kfree_skb(skb);
+
+		if (likely(nskb))
+			consume_skb(skb);
+		else
+			kfree_skb(skb);
 		skb = nskb;
 	}
 	return skb;
-- 
cgit v1.2.3


From 9eb43e765368f835d92c93844ebce30da7efeb84 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Aug 2012 21:27:25 +0000
Subject: ipv4: Introduce IN_DEV_NET_ROUTE_LOCALNET

performance profiles show a high cost in the IN_DEV_ROUTE_LOCALNET()
call done in ip_route_input_slow(), because of multiple dereferences,
even if cache lines are clean and available in cpu caches.

Since we already have the 'net' pointer, introduce
IN_DEV_NET_ROUTE_LOCALNET() macro avoiding two dereferences
(dev_net(in_dev->dev))

Also change the tests to use IN_DEV_NET_ROUTE_LOCALNET() only if saddr
or/and daddr are loopback addresse.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 11 +++++++++--
 net/ipv4/route.c           | 11 +++++++----
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 67f9ddacb70c..d032780d0ce5 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -104,9 +104,14 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_ANDCONF(in_dev, attr) \
 	(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) && \
 	 IN_DEV_CONF_GET((in_dev), attr))
-#define IN_DEV_ORCONF(in_dev, attr) \
-	(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr) || \
+
+#define IN_DEV_NET_ORCONF(in_dev, net, attr) \
+	(IPV4_DEVCONF_ALL(net, attr) || \
 	 IN_DEV_CONF_GET((in_dev), attr))
+
+#define IN_DEV_ORCONF(in_dev, attr) \
+	IN_DEV_NET_ORCONF(in_dev, dev_net(in_dev->dev), attr)
+
 #define IN_DEV_MAXCONF(in_dev, attr) \
 	(max(IPV4_DEVCONF_ALL(dev_net(in_dev->dev), attr), \
 	     IN_DEV_CONF_GET((in_dev), attr)))
@@ -133,6 +138,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 					IN_DEV_ORCONF((in_dev), \
 						      PROMOTE_SECONDARIES)
 #define IN_DEV_ROUTE_LOCALNET(in_dev)	IN_DEV_ORCONF(in_dev, ROUTE_LOCALNET)
+#define IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)	\
+	IN_DEV_NET_ORCONF(in_dev, net, ROUTE_LOCALNET)
 
 #define IN_DEV_RX_REDIRECTS(in_dev) \
 	((IN_DEV_FORWARD(in_dev) && \
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e4ba974f143c..21ad369014c0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1587,11 +1587,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (ipv4_is_zeronet(daddr))
 		goto martian_destination;
 
-	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) {
-		if (ipv4_is_loopback(daddr))
+	/* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
+	 * and call it once if daddr or/and saddr are loopback addresses
+	 */
+	if (ipv4_is_loopback(daddr)) {
+		if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
 			goto martian_destination;
-
-		if (ipv4_is_loopback(saddr))
+	} else if (ipv4_is_loopback(saddr)) {
+		if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
 			goto martian_source;
 	}
 
-- 
cgit v1.2.3


From 3f1732462c0e45ac9b0c09035751d7b2c1b89cc0 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@openbossa.org>
Date: Thu, 19 Jul 2012 11:46:13 -0300
Subject: Bluetooth: Remove missing code

This patch removes the struct adv_entry since it is not used anymore.
This struct should have been removed in commit 479453d (Bluetooth:
Remove advertising cache).

Signed-off-by: Andre Guedes <andre.guedes@openbossa.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 475b8c04ba52..41b26648e79e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -115,12 +115,6 @@ struct oob_data {
 	u8 randomizer[16];
 };
 
-struct adv_entry {
-	struct list_head list;
-	bdaddr_t bdaddr;
-	u8 bdaddr_type;
-};
-
 struct le_scan_params {
 	u8 type;
 	u16 interval;
-- 
cgit v1.2.3


From bb4b2a9ae38ef3bac69627f35e4f916752631fd1 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Thu, 19 Jul 2012 17:03:40 +0300
Subject: Bluetooth: mgmt: Managing only BR/EDR HCI controllers

Add check that HCI controller is BR/EDR. AMP controller shall not be
managed by mgmt interface and consequently user space.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Acked-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h |  2 +-
 net/bluetooth/hci_core.c         |  6 ++++--
 net/bluetooth/mgmt.c             | 20 ++++++++++++++++++--
 3 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 41b26648e79e..5c7a5f819fcf 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1050,7 +1050,7 @@ int mgmt_discovering(struct hci_dev *hdev, u8 discovering);
 int mgmt_interleaved_discovery(struct hci_dev *hdev);
 int mgmt_device_blocked(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type);
 int mgmt_device_unblocked(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type);
-
+bool mgmt_valid_hdev(struct hci_dev *hdev);
 int mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, u8 persistent);
 
 /* HCI info for socket */
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d4de5db18d5a..fa974a19d365 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -696,7 +696,8 @@ int hci_dev_open(__u16 dev)
 		hci_dev_hold(hdev);
 		set_bit(HCI_UP, &hdev->flags);
 		hci_notify(hdev, HCI_DEV_UP);
-		if (!test_bit(HCI_SETUP, &hdev->dev_flags)) {
+		if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
+		    mgmt_valid_hdev(hdev)) {
 			hci_dev_lock(hdev);
 			mgmt_powered(hdev, 1);
 			hci_dev_unlock(hdev);
@@ -797,7 +798,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	 * and no tasks are scheduled. */
 	hdev->close(hdev);
 
-	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
+	    mgmt_valid_hdev(hdev)) {
 		hci_dev_lock(hdev);
 		mgmt_powered(hdev, 0);
 		hci_dev_unlock(hdev);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ad6613d17ca6..2a0f695e33d4 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -193,6 +193,11 @@ static u8 mgmt_status_table[] = {
 	MGMT_STATUS_CONNECT_FAILED,	/* MAC Connection Failed */
 };
 
+bool mgmt_valid_hdev(struct hci_dev *hdev)
+{
+	return hdev->dev_type == HCI_BREDR;
+}
+
 static u8 mgmt_status(u8 hci_status)
 {
 	if (hci_status < ARRAY_SIZE(mgmt_status_table))
@@ -317,7 +322,6 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 			   u16 data_len)
 {
 	struct mgmt_rp_read_index_list *rp;
-	struct list_head *p;
 	struct hci_dev *d;
 	size_t rp_len;
 	u16 count;
@@ -328,7 +332,10 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 	read_lock(&hci_dev_list_lock);
 
 	count = 0;
-	list_for_each(p, &hci_dev_list) {
+	list_for_each_entry(d, &hci_dev_list, list) {
+		if (!mgmt_valid_hdev(d))
+			continue;
+
 		count++;
 	}
 
@@ -346,6 +353,9 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 		if (test_bit(HCI_SETUP, &d->dev_flags))
 			continue;
 
+		if (!mgmt_valid_hdev(d))
+			continue;
+
 		rp->index[i++] = cpu_to_le16(d->id);
 		BT_DBG("Added hci%u", d->id);
 	}
@@ -2820,6 +2830,9 @@ static void cmd_status_rsp(struct pending_cmd *cmd, void *data)
 
 int mgmt_index_added(struct hci_dev *hdev)
 {
+	if (!mgmt_valid_hdev(hdev))
+		return -ENOTSUPP;
+
 	return mgmt_event(MGMT_EV_INDEX_ADDED, hdev, NULL, 0, NULL);
 }
 
@@ -2827,6 +2840,9 @@ int mgmt_index_removed(struct hci_dev *hdev)
 {
 	u8 status = MGMT_STATUS_INVALID_INDEX;
 
+	if (!mgmt_valid_hdev(hdev))
+		return -ENOTSUPP;
+
 	mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status);
 
 	return mgmt_event(MGMT_EV_INDEX_REMOVED, hdev, NULL, 0, NULL);
-- 
cgit v1.2.3


From 71becf0cea91380200ab9460e5126aeff5d7420b Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Thu, 19 Jul 2012 17:03:42 +0300
Subject: Bluetooth: debug: Fix printing refcnt for hci_conn

Use the same style for refcnt printing through all Bluetooth code
taking the reference the l2cap_chan refcnt printing.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5c7a5f819fcf..03b8b6526400 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -581,8 +581,7 @@ void hci_conn_put_device(struct hci_conn *conn);
 
 static inline void hci_conn_hold(struct hci_conn *conn)
 {
-	BT_DBG("hcon %p refcnt %d -> %d", conn, atomic_read(&conn->refcnt),
-	       atomic_read(&conn->refcnt) + 1);
+	BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));
 
 	atomic_inc(&conn->refcnt);
 	cancel_delayed_work(&conn->disc_work);
@@ -590,8 +589,7 @@ static inline void hci_conn_hold(struct hci_conn *conn)
 
 static inline void hci_conn_put(struct hci_conn *conn)
 {
-	BT_DBG("hcon %p refcnt %d -> %d", conn, atomic_read(&conn->refcnt),
-	       atomic_read(&conn->refcnt) - 1);
+	BT_DBG("hcon %p orig refcnt %d", conn, atomic_read(&conn->refcnt));
 
 	if (atomic_dec_and_test(&conn->refcnt)) {
 		unsigned long timeo;
-- 
cgit v1.2.3


From b93a68295f3a2b1b66d235ce8f9f5a97553f0d0e Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Thu, 19 Jul 2012 17:03:44 +0300
Subject: Bluetooth: trivial: Fix mixing spaces and tabs in smp

Change spaces to tabs in smp code

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/smp.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/smp.h b/include/net/bluetooth/smp.h
index ca356a734920..50993a531d45 100644
--- a/include/net/bluetooth/smp.h
+++ b/include/net/bluetooth/smp.h
@@ -108,8 +108,8 @@ struct smp_cmd_security_req {
 #define SMP_CONFIRM_FAILED		0x04
 #define SMP_PAIRING_NOTSUPP		0x05
 #define SMP_ENC_KEY_SIZE		0x06
-#define SMP_CMD_NOTSUPP		0x07
-#define SMP_UNSPECIFIED		0x08
+#define SMP_CMD_NOTSUPP			0x07
+#define SMP_UNSPECIFIED			0x08
 #define SMP_REPEATED_ATTEMPTS		0x09
 
 #define SMP_MIN_ENC_KEY_SIZE		7
@@ -123,8 +123,8 @@ struct smp_chan {
 	struct l2cap_conn *conn;
 	u8		preq[7]; /* SMP Pairing Request */
 	u8		prsp[7]; /* SMP Pairing Response */
-	u8              prnd[16]; /* SMP Pairing Random (local) */
-	u8              rrnd[16]; /* SMP Pairing Random (remote) */
+	u8		prnd[16]; /* SMP Pairing Random (local) */
+	u8		rrnd[16]; /* SMP Pairing Random (remote) */
 	u8		pcnf[16]; /* SMP Pairing Confirm */
 	u8		tk[16]; /* SMP Temporary Key */
 	u8		enc_key_size;
-- 
cgit v1.2.3


From ab846ec4eaea1156148841b194df808ad745bbe2 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Thu, 19 Jul 2012 17:03:45 +0300
Subject: Bluetooth: Define AMP controller statuses

AMP status codes copied from Bluez patch sent by Peter Krystad
<pkrystad@codeaurora.org>.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ccd723e0f783..7f1955662455 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -62,6 +62,15 @@
 /* First BR/EDR Controller shall have ID = 0 */
 #define HCI_BREDR_ID	0
 
+/* AMP controller status */
+#define AMP_CTRL_POWERED_DOWN			0x00
+#define AMP_CTRL_BLUETOOTH_ONLY			0x01
+#define AMP_CTRL_NO_CAPACITY			0x02
+#define AMP_CTRL_LOW_CAPACITY			0x03
+#define AMP_CTRL_MEDIUM_CAPACITY		0x04
+#define AMP_CTRL_HIGH_CAPACITY			0x05
+#define AMP_CTRL_FULL_CAPACITY			0x06
+
 /* HCI device quirks */
 enum {
 	HCI_QUIRK_RESET_ON_CLOSE,
-- 
cgit v1.2.3


From 9e66463127ff7238020c3c4e7f84dfbc23e5c2b5 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Tue, 24 Jul 2012 16:06:15 +0300
Subject: Bluetooth: Make connect / disconnect cfm functions return void

Return values are never used because callers hci_proto_connect_cfm
and hci_proto_disconn_cfm return void.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h | 8 ++++----
 net/bluetooth/l2cap_core.c       | 6 ++----
 net/bluetooth/sco.c              | 7 ++-----
 3 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 03b8b6526400..41d943926d2c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -350,16 +350,16 @@ extern rwlock_t hci_cb_list_lock;
 
 /* ----- HCI interface to upper protocols ----- */
 extern int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
-extern int l2cap_connect_cfm(struct hci_conn *hcon, u8 status);
+extern void l2cap_connect_cfm(struct hci_conn *hcon, u8 status);
 extern int l2cap_disconn_ind(struct hci_conn *hcon);
-extern int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason);
+extern void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason);
 extern int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt);
 extern int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb,
 			      u16 flags);
 
 extern int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr);
-extern int sco_connect_cfm(struct hci_conn *hcon, __u8 status);
-extern int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason);
+extern void sco_connect_cfm(struct hci_conn *hcon, __u8 status);
+extern void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason);
 extern int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb);
 
 /* ----- Inquiry cache ----- */
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 64d88a603f0a..8391e0575494 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5329,7 +5329,7 @@ int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
 	return exact ? lm1 : lm2;
 }
 
-int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
+void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 {
 	struct l2cap_conn *conn;
 
@@ -5342,7 +5342,6 @@ int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 	} else
 		l2cap_conn_del(hcon, bt_to_errno(status));
 
-	return 0;
 }
 
 int l2cap_disconn_ind(struct hci_conn *hcon)
@@ -5356,12 +5355,11 @@ int l2cap_disconn_ind(struct hci_conn *hcon)
 	return conn->disc_reason;
 }
 
-int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
+void l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
 {
 	BT_DBG("hcon %p reason %d", hcon, reason);
 
 	l2cap_conn_del(hcon, bt_to_errno(reason));
-	return 0;
 }
 
 static inline void l2cap_check_encryption(struct l2cap_chan *chan, u8 encrypt)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 40bbe25dcff7..0ef5a78a889f 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -913,7 +913,7 @@ int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr)
 	return lm;
 }
 
-int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
+void sco_connect_cfm(struct hci_conn *hcon, __u8 status)
 {
 	BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
 	if (!status) {
@@ -924,16 +924,13 @@ int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
 			sco_conn_ready(conn);
 	} else
 		sco_conn_del(hcon, bt_to_errno(status));
-
-	return 0;
 }
 
-int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
+void sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
 {
 	BT_DBG("hcon %p reason %d", hcon, reason);
 
 	sco_conn_del(hcon, bt_to_errno(reason));
-	return 0;
 }
 
 int sco_recv_scodata(struct hci_conn *hcon, struct sk_buff *skb)
-- 
cgit v1.2.3


From 3064837289259843310b266a9422aca5f5b4b9c7 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k@samsung.com>
Date: Fri, 13 Jul 2012 18:17:54 +0530
Subject: Bluetooth: Move l2cap_chan_hold/put to l2cap_core.c

Refactor the code in order to use the l2cap_chan_destroy()
from l2cap_chan_put() under the refcnt protection.

Signed-off-by: Jaganath Kanakkassery <jaganath.k@samsung.com>
Signed-off-by: Syam Sidhardhan <s.syam@samsung.com>
Reviewed-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/l2cap.h | 16 ++--------------
 net/bluetooth/l2cap_core.c    | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index a7679f8913d2..a1eb6786ce54 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -671,20 +671,8 @@ enum {
 	L2CAP_EV_RECV_FRAME,
 };
 
-static inline void l2cap_chan_hold(struct l2cap_chan *c)
-{
-	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
-
-	atomic_inc(&c->refcnt);
-}
-
-static inline void l2cap_chan_put(struct l2cap_chan *c)
-{
-	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
-
-	if (atomic_dec_and_test(&c->refcnt))
-		kfree(c);
-}
+void l2cap_chan_hold(struct l2cap_chan *c);
+void l2cap_chan_put(struct l2cap_chan *c);
 
 static inline void l2cap_chan_lock(struct l2cap_chan *chan)
 {
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 8391e0575494..79923d8bbe97 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -425,6 +425,21 @@ void l2cap_chan_destroy(struct l2cap_chan *chan)
 	l2cap_chan_put(chan);
 }
 
+void l2cap_chan_hold(struct l2cap_chan *c)
+{
+	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
+
+	atomic_inc(&c->refcnt);
+}
+
+void l2cap_chan_put(struct l2cap_chan *c)
+{
+	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
+
+	if (atomic_dec_and_test(&c->refcnt))
+		kfree(c);
+}
+
 void l2cap_chan_set_defaults(struct l2cap_chan *chan)
 {
 	chan->fcs  = L2CAP_FCS_CRC16;
-- 
cgit v1.2.3


From 4af66c691f4e5c2db9bb00793669a548e9db1974 Mon Sep 17 00:00:00 2001
From: Jaganath Kanakkassery <jaganath.k@samsung.com>
Date: Fri, 13 Jul 2012 18:17:55 +0530
Subject: Bluetooth: Free the l2cap channel list only when refcount is zero

Move the l2cap channel list chan->global_l under the refcnt
protection and free it based on the refcnt.

Signed-off-by: Jaganath Kanakkassery <jaganath.k@samsung.com>
Signed-off-by: Syam Sidhardhan <s.syam@samsung.com>
Reviewed-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/l2cap.h | 1 -
 net/bluetooth/a2mp.c          | 2 +-
 net/bluetooth/l2cap_core.c    | 8 +++++---
 net/bluetooth/l2cap_sock.c    | 2 +-
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index a1eb6786ce54..d206296137e2 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -759,7 +759,6 @@ int l2cap_add_scid(struct l2cap_chan *chan,  __u16 scid);
 
 struct l2cap_chan *l2cap_chan_create(void);
 void l2cap_chan_close(struct l2cap_chan *chan, int reason);
-void l2cap_chan_destroy(struct l2cap_chan *chan);
 int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
 		       bdaddr_t *dst, u8 dst_type);
 int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len,
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 79af661a58dd..0760d1fed6f0 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -416,7 +416,7 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
 
 static void a2mp_chan_close_cb(struct l2cap_chan *chan)
 {
-	l2cap_chan_destroy(chan);
+	l2cap_chan_put(chan);
 }
 
 static void a2mp_chan_state_change_cb(struct l2cap_chan *chan, int state)
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 79923d8bbe97..9f8b29ef5b68 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -416,13 +416,15 @@ struct l2cap_chan *l2cap_chan_create(void)
 	return chan;
 }
 
-void l2cap_chan_destroy(struct l2cap_chan *chan)
+static void l2cap_chan_destroy(struct l2cap_chan *chan)
 {
+	BT_DBG("chan %p", chan);
+
 	write_lock(&chan_list_lock);
 	list_del(&chan->global_l);
 	write_unlock(&chan_list_lock);
 
-	l2cap_chan_put(chan);
+	kfree(chan);
 }
 
 void l2cap_chan_hold(struct l2cap_chan *c)
@@ -437,7 +439,7 @@ void l2cap_chan_put(struct l2cap_chan *c)
 	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
 
 	if (atomic_dec_and_test(&c->refcnt))
-		kfree(c);
+		l2cap_chan_destroy(c);
 }
 
 void l2cap_chan_set_defaults(struct l2cap_chan *chan)
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a4bb27e8427e..79350d10087c 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -823,7 +823,7 @@ static void l2cap_sock_kill(struct sock *sk)
 
 	/* Kill poor orphan */
 
-	l2cap_chan_destroy(l2cap_pi(sk)->chan);
+	l2cap_chan_put(l2cap_pi(sk)->chan);
 	sock_set_flag(sk, SOCK_DEAD);
 	sock_put(sk);
 }
-- 
cgit v1.2.3


From 256a06c8a85df676e80263af349daad1283e529e Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Thu, 26 Jul 2012 01:26:32 +0900
Subject: Bluetooth: /proc/net/ entries for bluetooth protocols

lsof command can tell the type of socket processes are using.
Internal lsof uses inode numbers on socket fs to resolve the type of
sockets. Files under /proc/net/, such as tcp, udp, unix, etc provides
such inode information.

Unfortunately bluetooth related protocols don't provide such inode
information. This patch series introduces /proc/net files for the protocols.

This patch against af_bluetooth.c provides facility to the implementation
of protocols. This patch extends bt_sock_list and introduces two exported
function bt_procfs_init, bt_procfs_cleanup.

The type bt_sock_list is already used in some of implementation of
protocols. bt_procfs_init prepare seq_operations which converts
protocol own bt_sock_list data to protocol own proc entry when the
entry is accessed.

What I, lsof user, need is just inode number of bluetooth
socket. However, people may want more information. The bt_procfs_init
takes a function pointer for customizing the show handler of
seq_operations.

In v4 patch, __acquires and __releases attributes are added to suppress
sparse warning. Suggested by Andrei Emeltchenko.

In v5 patch, linux/proc_fs.h is included to use PDE. Build error is
reported by Fengguang Wu.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/bluetooth.h |  10 +++
 net/bluetooth/af_bluetooth.c      | 141 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 151 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 565d4bee1e49..ede036977ae8 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -27,6 +27,7 @@
 
 #include <linux/poll.h>
 #include <net/sock.h>
+#include <linux/seq_file.h>
 
 #ifndef AF_BLUETOOTH
 #define AF_BLUETOOTH	31
@@ -202,6 +203,10 @@ enum {
 struct bt_sock_list {
 	struct hlist_head head;
 	rwlock_t          lock;
+#ifdef CONFIG_PROC_FS
+        struct file_operations   fops;
+        int (* custom_seq_show)(struct seq_file *, void *);
+#endif
 };
 
 int  bt_sock_register(int proto, const struct net_proto_family *ops);
@@ -292,6 +297,11 @@ extern void hci_sock_cleanup(void);
 extern int bt_sysfs_init(void);
 extern void bt_sysfs_cleanup(void);
 
+extern int  bt_procfs_init(struct module* module, struct net *net, const char *name,
+			   struct bt_sock_list* sk_list,
+			   int (* seq_show)(struct seq_file *, void *));
+extern void bt_procfs_cleanup(struct net *net, const char *name);
+
 extern struct dentry *bt_debugfs;
 
 int l2cap_init(void);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index f7db5792ec64..58f9762b339a 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -28,6 +28,7 @@
 #include <asm/ioctls.h>
 
 #include <net/bluetooth/bluetooth.h>
+#include <linux/proc_fs.h>
 
 #define VERSION "2.16"
 
@@ -532,6 +533,146 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
 }
 EXPORT_SYMBOL(bt_sock_wait_state);
 
+#ifdef CONFIG_PROC_FS
+struct bt_seq_state {
+	struct bt_sock_list *l;
+};
+
+static void *bt_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(seq->private->l->lock)
+{
+	struct bt_seq_state *s = seq->private;
+	struct bt_sock_list *l = s->l;
+
+	read_lock(&l->lock);
+	return seq_hlist_start_head(&l->head, *pos);
+}
+
+static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct bt_seq_state *s = seq->private;
+	struct bt_sock_list *l = s->l;
+
+	return seq_hlist_next(v, &l->head, pos);
+}
+
+static void bt_seq_stop(struct seq_file *seq, void *v)
+	__releases(seq->private->l->lock)
+{
+	struct bt_seq_state *s = seq->private;
+	struct bt_sock_list *l = s->l;
+
+	read_unlock(&l->lock);
+}
+
+static int bt_seq_show(struct seq_file *seq, void *v)
+{
+	struct sock *sk;
+	struct bt_sock *bt;
+	struct bt_seq_state *s = seq->private;
+	struct bt_sock_list *l = s->l;
+	bdaddr_t src_baswapped, dst_baswapped;
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq ,"sk               RefCnt Rmem   Wmem   User   Inode  Src Dst Parent");
+
+		if (l->custom_seq_show) {
+			seq_putc(seq, ' ');
+			l->custom_seq_show(seq, v);
+		}
+
+		seq_putc(seq, '\n');
+	} else {
+		sk = sk_entry(v);
+		bt = bt_sk(sk);
+		baswap(&src_baswapped, &bt->src);
+		baswap(&dst_baswapped, &bt->dst);
+
+		seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %pM %pM %-6lu",
+			   sk,
+			   atomic_read(&sk->sk_refcnt),
+			   sk_rmem_alloc_get(sk),
+			   sk_wmem_alloc_get(sk),
+			   sock_i_uid(sk),
+			   sock_i_ino(sk),
+			   &src_baswapped,
+			   &dst_baswapped,
+			   bt->parent? sock_i_ino(bt->parent): 0LU);
+
+		if (l->custom_seq_show) {
+			seq_putc(seq, ' ');
+			l->custom_seq_show(seq, v);
+		}
+
+		seq_putc(seq, '\n');
+	}
+	return 0;
+}
+
+static struct seq_operations bt_seq_ops = {
+	.start = bt_seq_start,
+	.next  = bt_seq_next,
+	.stop  = bt_seq_stop,
+	.show  = bt_seq_show,
+};
+
+static int bt_seq_open(struct inode *inode, struct file *file)
+{
+	struct bt_sock_list *sk_list;
+	struct bt_seq_state *s;
+
+	sk_list = PDE(inode)->data;
+	s = __seq_open_private(file, &bt_seq_ops,
+			       sizeof(struct bt_seq_state));
+	if (s == NULL)
+		return -ENOMEM;
+
+	s->l = sk_list;
+	return 0;
+}
+
+int bt_procfs_init(struct module* module, struct net *net, const char *name,
+		   struct bt_sock_list* sk_list,
+		   int (* seq_show)(struct seq_file *, void *))
+{
+	struct proc_dir_entry * pde;
+
+	sk_list->custom_seq_show = seq_show;
+
+	sk_list->fops.owner     = module;
+	sk_list->fops.open      = bt_seq_open;
+	sk_list->fops.read      = seq_read;
+	sk_list->fops.llseek    = seq_lseek;
+	sk_list->fops.release   = seq_release_private;
+
+	pde = proc_net_fops_create(net, name, 0, &sk_list->fops);
+	if (pde == NULL)
+		return -ENOMEM;
+
+	pde->data = sk_list;
+
+	return 0;
+}
+
+void bt_procfs_cleanup(struct net *net, const char *name)
+{
+	proc_net_remove(net, name);
+}
+#else
+int bt_procfs_init(struct module* module, struct net *net, const char *name,
+		   struct bt_sock_list* sk_list,
+		   int (* seq_show)(struct seq_file *, void *))
+{
+	return 0;
+}
+
+void bt_procfs_cleanup(struct net *net, const char *name)
+{
+}
+#endif
+EXPORT_SYMBOL(bt_procfs_init);
+EXPORT_SYMBOL(bt_procfs_cleanup);
+
 static struct net_proto_family bt_sock_family_ops = {
 	.owner	= THIS_MODULE,
 	.family	= PF_BLUETOOTH,
-- 
cgit v1.2.3


From b9b343d25484bbceaee454ab422daafb1c5eda96 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@openbossa.org>
Date: Fri, 27 Jul 2012 15:10:11 -0300
Subject: Bluetooth: Fix hci_le_conn_complete_evt

We need to check the 'Role' parameter from the LE Connection
Complete Event in order to properly set 'out' and 'link_mode'
fields from hci_conn structure.

Signed-off-by: Andre Guedes <andre.guedes@openbossa.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci.h | 2 ++
 net/bluetooth/hci_event.c   | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 7f1955662455..23cf413e2acf 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1304,6 +1304,8 @@ struct hci_ev_num_comp_blocks {
 } __packed;
 
 /* Low energy meta events */
+#define LE_CONN_ROLE_MASTER	0x00
+
 #define HCI_EV_LE_CONN_COMPLETE		0x01
 struct hci_ev_le_conn_complete {
 	__u8     status;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index e89d7f24136b..8b13cccd50cd 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3368,6 +3368,11 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		}
 
 		conn->dst_type = ev->bdaddr_type;
+
+		if (ev->role == LE_CONN_ROLE_MASTER) {
+			conn->out = true;
+			conn->link_mode |= HCI_LM_MASTER;
+		}
 	}
 
 	if (!test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags))
-- 
cgit v1.2.3


From d25398df59b561a26cb4000ceb4dea8a3ff94d22 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Aug 2012 20:26:13 +0000
Subject: net: avoid reloads in SNMP_UPD_PO_STATS

Avoid two instructions to reload dev->nd_net->mib.ip_statistics pointer,
unsing a temp variable, in ip_rcv(), ip_output() paths for example.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/snmp.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/snmp.h b/include/net/snmp.h
index 0147b901e79c..71596261fa99 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -154,13 +154,15 @@ struct linux_xfrm_mib {
  */
 #define SNMP_UPD_PO_STATS(mib, basefield, addend)	\
 	do { \
-		this_cpu_inc(mib[0]->mibs[basefield##PKTS]);		\
-		this_cpu_add(mib[0]->mibs[basefield##OCTETS], addend);	\
+		__typeof__(*mib[0]->mibs) *ptr = mib[0]->mibs;	\
+		this_cpu_inc(ptr[basefield##PKTS]);		\
+		this_cpu_add(ptr[basefield##OCTETS], addend);	\
 	} while (0)
 #define SNMP_UPD_PO_STATS_BH(mib, basefield, addend)	\
 	do { \
-		__this_cpu_inc(mib[0]->mibs[basefield##PKTS]);		\
-		__this_cpu_add(mib[0]->mibs[basefield##OCTETS], addend);	\
+		__typeof__(*mib[0]->mibs) *ptr = mib[0]->mibs;	\
+		__this_cpu_inc(ptr[basefield##PKTS]);		\
+		__this_cpu_add(ptr[basefield##OCTETS], addend);	\
 	} while (0)
 
 
-- 
cgit v1.2.3


From 14a196807482e6fc74f15fc03176d5c08880588f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Aug 2012 20:33:59 +0000
Subject: net: reorganize IP MIB values

Reduce IP latencies by placing hot MIB IP fields in a single cache line.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 00bc189cb395..ad6e3a6bf9fb 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -18,7 +18,14 @@
 enum
 {
 	IPSTATS_MIB_NUM = 0,
+/* frequently written fields in fast path, kept in same cache line */
 	IPSTATS_MIB_INPKTS,			/* InReceives */
+	IPSTATS_MIB_INOCTETS,			/* InOctets */
+	IPSTATS_MIB_INDELIVERS,			/* InDelivers */
+	IPSTATS_MIB_OUTFORWDATAGRAMS,		/* OutForwDatagrams */
+	IPSTATS_MIB_OUTPKTS,			/* OutRequests */
+	IPSTATS_MIB_OUTOCTETS,			/* OutOctets */
+/* other fields */
 	IPSTATS_MIB_INHDRERRORS,		/* InHdrErrors */
 	IPSTATS_MIB_INTOOBIGERRORS,		/* InTooBigErrors */
 	IPSTATS_MIB_INNOROUTES,			/* InNoRoutes */
@@ -26,9 +33,6 @@ enum
 	IPSTATS_MIB_INUNKNOWNPROTOS,		/* InUnknownProtos */
 	IPSTATS_MIB_INTRUNCATEDPKTS,		/* InTruncatedPkts */
 	IPSTATS_MIB_INDISCARDS,			/* InDiscards */
-	IPSTATS_MIB_INDELIVERS,			/* InDelivers */
-	IPSTATS_MIB_OUTFORWDATAGRAMS,		/* OutForwDatagrams */
-	IPSTATS_MIB_OUTPKTS,			/* OutRequests */
 	IPSTATS_MIB_OUTDISCARDS,		/* OutDiscards */
 	IPSTATS_MIB_OUTNOROUTES,		/* OutNoRoutes */
 	IPSTATS_MIB_REASMTIMEOUT,		/* ReasmTimeout */
@@ -42,8 +46,6 @@ enum
 	IPSTATS_MIB_OUTMCASTPKTS,		/* OutMcastPkts */
 	IPSTATS_MIB_INBCASTPKTS,		/* InBcastPkts */
 	IPSTATS_MIB_OUTBCASTPKTS,		/* OutBcastPkts */
-	IPSTATS_MIB_INOCTETS,			/* InOctets */
-	IPSTATS_MIB_OUTOCTETS,			/* OutOctets */
 	IPSTATS_MIB_INMCASTOCTETS,		/* InMcastOctets */
 	IPSTATS_MIB_OUTMCASTOCTETS,		/* OutMcastOctets */
 	IPSTATS_MIB_INBCASTOCTETS,		/* InBcastOctets */
-- 
cgit v1.2.3


From 425f09ab7d1c9da6ca4137dd639cb6fe3f8a88f3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 7 Aug 2012 02:19:56 +0000
Subject: net: output path optimizations

1) Avoid dirtying neighbour's confirmed field.

  TCP workloads hits this cache line for each incoming ACK.
  Lets write n->confirmed only if there is a jiffie change.

2) Optimize neigh_hh_output() for the common Ethernet case, were
   hh_len is less than 16 bytes. Replace the memcpy() call
   by two inlined 64bit load/stores on x86_64.

Bench results using udpflood test, with -C option (MSG_CONFIRM flag
added to sendto(), to reproduce the n->confirmed dirtying on UDP)

24 threads doing 1.000.000 UDP sendto() on dummy device, 4 runs.

before : 2.247s, 2.235s, 2.247s, 2.318s
after  : 1.884s, 1.905s, 1.891s, 1.895s

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h       | 10 +++++++---
 include/net/neighbour.h | 14 +++++++++-----
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index baf597890064..77f52f7dc823 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -396,11 +396,15 @@ static inline void dst_confirm(struct dst_entry *dst)
 static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
 				   struct sk_buff *skb)
 {
-	struct hh_cache *hh;
+	const struct hh_cache *hh;
+
+	if (dst->pending_confirm) {
+		unsigned long now = jiffies;
 
-	if (unlikely(dst->pending_confirm)) {
-		n->confirmed = jiffies;
 		dst->pending_confirm = 0;
+		/* avoid dirtying neighbour */
+		if (n->confirmed != now)
+			n->confirmed = now;
 	}
 
 	hh = &n->hh;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 344d8988842a..0dab173e27da 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -334,18 +334,22 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
 }
 #endif
 
-static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
+static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
 {
 	unsigned int seq;
 	int hh_len;
 
 	do {
-		int hh_alen;
-
 		seq = read_seqbegin(&hh->hh_lock);
 		hh_len = hh->hh_len;
-		hh_alen = HH_DATA_ALIGN(hh_len);
-		memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
+		if (likely(hh_len <= HH_DATA_MOD)) {
+			/* this is inlined by gcc */
+			memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD);
+		} else {
+			int hh_alen = HH_DATA_ALIGN(hh_len);
+
+			memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
+		}
 	} while (read_seqretry(&hh->hh_lock, seq));
 
 	skb_push(skb, hh_len);
-- 
cgit v1.2.3


From a399a8053164ec8bcb06fed52be9941a26ecde11 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Aug 2012 21:13:53 +0000
Subject: time: jiffies_delta_to_clock_t() helper to the rescue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Various /proc/net files sometimes report crazy timer values, expressed
in clock_t units.

This happens when an expired timer delta (expires - jiffies) is passed
to jiffies_to_clock_t().

This function has an overflow in :

return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ);

commit cbbc719fccdb8cb (time: Change jiffies_to_clock_t() argument type
to unsigned long) only got around the problem.

As we cant output negative values in /proc/net/tcp without breaking
various tools, I suggest adding a jiffies_delta_to_clock_t() wrapper
that caps the negative delta to a 0 value.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Maciej Żenczykowski <maze@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: hank <pyu@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/jiffies.h   |  6 ++++++
 net/bridge/br_fdb.c       |  2 +-
 net/bridge/br_stp_timer.c |  2 +-
 net/core/rtnetlink.c      |  2 +-
 net/ipv4/igmp.c           |  7 +++++--
 net/ipv4/tcp_ipv4.c       | 13 +++++--------
 net/ipv6/tcp_ipv6.c       |  9 +++------
 7 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 265e2c3cbd1c..aded9b1ac5ca 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -303,7 +303,13 @@ extern void jiffies_to_timespec(const unsigned long jiffies,
 extern unsigned long timeval_to_jiffies(const struct timeval *value);
 extern void jiffies_to_timeval(const unsigned long jiffies,
 			       struct timeval *value);
+
 extern clock_t jiffies_to_clock_t(unsigned long x);
+static inline clock_t jiffies_delta_to_clock_t(long delta)
+{
+	return jiffies_to_clock_t(max(0L, delta));
+}
+
 extern unsigned long clock_t_to_jiffies(unsigned long x);
 extern u64 jiffies_64_to_clock_t(u64 x);
 extern u64 nsec_to_clock_t(u64 x);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index d21f32383517..9ce430b4657c 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -312,7 +312,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf,
 
 			fe->is_local = f->is_local;
 			if (!f->is_static)
-				fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated);
+				fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
 			++fe;
 			++num;
 		}
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index a6747e673426..c3530a81a33b 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -170,5 +170,5 @@ void br_stp_port_timer_init(struct net_bridge_port *p)
 unsigned long br_timer_value(const struct timer_list *timer)
 {
 	return timer_pending(timer)
-		? jiffies_to_clock_t(timer->expires - jiffies) : 0;
+		? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2c5a0a06c4ce..db037c9a4c48 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -618,7 +618,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
 		       long expires, u32 error)
 {
 	struct rta_cacheinfo ci = {
-		.rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse),
+		.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse),
 		.rta_used = dst->__use,
 		.rta_clntref = atomic_read(&(dst->__refcnt)),
 		.rta_error = error,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 6699f23e6f55..0b5580c69f2d 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2435,6 +2435,8 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
 		struct ip_mc_list *im = (struct ip_mc_list *)v;
 		struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
 		char   *querier;
+		long delta;
+
 #ifdef CONFIG_IP_MULTICAST
 		querier = IGMP_V1_SEEN(state->in_dev) ? "V1" :
 			  IGMP_V2_SEEN(state->in_dev) ? "V2" :
@@ -2448,11 +2450,12 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
 				   state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier);
 		}
 
+		delta = im->timer.expires - jiffies;
 		seq_printf(seq,
 			   "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n",
 			   im->multiaddr, im->users,
-			   im->tm_running, im->tm_running ?
-			   jiffies_to_clock_t(im->timer.expires-jiffies) : 0,
+			   im->tm_running,
+			   im->tm_running ? jiffies_delta_to_clock_t(delta) : 0,
 			   im->reporter);
 	}
 	return 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 42b2a6a73092..c660d2c19a2b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2385,7 +2385,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
 			 struct seq_file *f, int i, int uid, int *len)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
-	int ttd = req->expires - jiffies;
+	long delta = req->expires - jiffies;
 
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
@@ -2397,7 +2397,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req,
 		TCP_SYN_RECV,
 		0, 0, /* could print option size, but that is af dependent. */
 		1,    /* timers active (only the expire timer) */
-		jiffies_to_clock_t(ttd),
+		jiffies_delta_to_clock_t(delta),
 		req->retrans,
 		uid,
 		0,  /* non standard timer */
@@ -2448,7 +2448,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 		tp->write_seq - tp->snd_una,
 		rx_queue,
 		timer_active,
-		jiffies_to_clock_t(timer_expires - jiffies),
+		jiffies_delta_to_clock_t(timer_expires - jiffies),
 		icsk->icsk_retransmits,
 		sock_i_uid(sk),
 		icsk->icsk_probes_out,
@@ -2467,10 +2467,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
 {
 	__be32 dest, src;
 	__u16 destp, srcp;
-	int ttd = tw->tw_ttd - jiffies;
-
-	if (ttd < 0)
-		ttd = 0;
+	long delta = tw->tw_ttd - jiffies;
 
 	dest  = tw->tw_daddr;
 	src   = tw->tw_rcv_saddr;
@@ -2480,7 +2477,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw,
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
 		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
-		3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
 		atomic_read(&tw->tw_refcnt), tw, len);
 }
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c66b90f71c9b..aa41b0e6b163 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1875,7 +1875,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   tp->write_seq-tp->snd_una,
 		   (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
 		   timer_active,
-		   jiffies_to_clock_t(timer_expires - jiffies),
+		   jiffies_delta_to_clock_t(timer_expires - jiffies),
 		   icsk->icsk_retransmits,
 		   sock_i_uid(sp),
 		   icsk->icsk_probes_out,
@@ -1895,10 +1895,7 @@ static void get_timewait6_sock(struct seq_file *seq,
 	const struct in6_addr *dest, *src;
 	__u16 destp, srcp;
 	const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
-	int ttd = tw->tw_ttd - jiffies;
-
-	if (ttd < 0)
-		ttd = 0;
+	long delta = tw->tw_ttd - jiffies;
 
 	dest = &tw6->tw_v6_daddr;
 	src  = &tw6->tw_v6_rcv_saddr;
@@ -1914,7 +1911,7 @@ static void get_timewait6_sock(struct seq_file *seq,
 		   dest->s6_addr32[0], dest->s6_addr32[1],
 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
 		   tw->tw_substate, 0, 0,
-		   3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
+		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
 		   atomic_read(&tw->tw_refcnt), tw);
 }
 
-- 
cgit v1.2.3


From b14f243a42c7aa43de71f878641acd003f223022 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 8 Aug 2012 21:52:28 +0000
Subject: net: Dont use ifindices in hash fns

Eric noticed, that when there will be devices with equal indices, some
hash functions that use them will become less effective as they could.
Fix this in advance by mixing the net_device address into the hash value
instead of the device index.

This is true for arp and ndisc hash fns. The netlabel, can and llc ones
are also ifindex-based, but that three are init_net-only, thus will not
be affected.

Many thanks to David and Eric for the hash32_ptr implementation!

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/hash.h | 10 ++++++++++
 include/net/arp.h    |  3 ++-
 include/net/ndisc.h  |  3 ++-
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hash.h b/include/linux/hash.h
index b80506bdd733..24df9e70406f 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -67,4 +67,14 @@ static inline unsigned long hash_ptr(const void *ptr, unsigned int bits)
 {
 	return hash_long((unsigned long)ptr, bits);
 }
+
+static inline u32 hash32_ptr(const void *ptr)
+{
+	unsigned long val = (unsigned long)ptr;
+
+#if BITS_PER_LONG == 64
+	val ^= (val >> 32);
+#endif
+	return (u32)val;
+}
 #endif /* _LINUX_HASH_H */
diff --git a/include/net/arp.h b/include/net/arp.h
index 7f7df93f37cd..b630dae03411 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -3,6 +3,7 @@
 #define _ARP_H
 
 #include <linux/if_arp.h>
+#include <linux/hash.h>
 #include <net/neighbour.h>
 
 
@@ -10,7 +11,7 @@ extern struct neigh_table arp_tbl;
 
 static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd)
 {
-	u32 val = key ^ dev->ifindex;
+	u32 val = key ^ hash32_ptr(dev);
 
 	return val * hash_rnd;
 }
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 96a3b5c03e37..980d263765cf 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -49,6 +49,7 @@ enum {
 #include <linux/types.h>
 #include <linux/if_arp.h>
 #include <linux/netdevice.h>
+#include <linux/hash.h>
 
 #include <net/neighbour.h>
 
@@ -134,7 +135,7 @@ static inline u32 ndisc_hashfn(const void *pkey, const struct net_device *dev, _
 {
 	const u32 *p32 = pkey;
 
-	return (((p32[0] ^ dev->ifindex) * hash_rnd[0]) +
+	return (((p32[0] ^ hash32_ptr(dev)) * hash_rnd[0]) +
 		(p32[1] * hash_rnd[1]) +
 		(p32[2] * hash_rnd[2]) +
 		(p32[3] * hash_rnd[3]));
-- 
cgit v1.2.3


From aa79e66eee5d525e2fcbd2a5fcb87ae3dd4aa9e9 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 8 Aug 2012 21:53:19 +0000
Subject: net: Make ifindex generation per-net namespace

Strictly speaking this is only _really_ required for checkpoint-restore to
make loopback device always have the same index.

This change appears to be safe wrt "ifindex should be unique per-system"
concept, as all the ifindex usage is either already made per net namespace
of is explicitly limited with init_net only.

There are two cool side effects of this. The first one -- ifindices of
devices in container are always small, regardless of how many containers
we've started (and re-started) so far. The second one is -- we can speed
up the loopback ifidex access as shown in the next patch.

v2: Place ifindex right after dev_base_seq : avoid two holes and use the
    same cache line, dirtied in list_netdevice()/unlist_netdevice()

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 1 +
 net/core/dev.c              | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index ae1cd6c9ba52..6dc3db3466bf 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -66,6 +66,7 @@ struct net {
 	struct hlist_head 	*dev_name_head;
 	struct hlist_head	*dev_index_head;
 	unsigned int		dev_base_seq;	/* protected by rtnl_mutex */
+	int			ifindex;
 
 	/* core fib_rules */
 	struct list_head	rules_ops;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3ca300d85271..1f06df8d10a3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5221,12 +5221,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
  */
 static int dev_new_index(struct net *net)
 {
-	static int ifindex;
+	int ifindex = net->ifindex;
 	for (;;) {
 		if (++ifindex <= 0)
 			ifindex = 1;
 		if (!__dev_get_by_index(net, ifindex))
-			return ifindex;
+			return net->ifindex = ifindex;
 	}
 }
 
-- 
cgit v1.2.3


From 1fb9489bf190ce2b3fc03891f3de4b2d30600e28 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Wed, 8 Aug 2012 21:53:36 +0000
Subject: net: Loopback ifindex is constant now

As pointed out, there are places, that access net->loopback_dev->ifindex
and after ifindex generation is made per-net this value becomes constant
equals 1. So go ahead and introduce the LOOPBACK_IFINDEX constant and use
it where appropriate.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c            | 1 +
 include/net/net_namespace.h       | 7 +++++++
 net/decnet/dn_route.c             | 6 +++---
 net/ipv4/fib_frontend.c           | 2 +-
 net/ipv4/ipmr.c                   | 2 +-
 net/ipv4/netfilter/ipt_rpfilter.c | 2 +-
 net/ipv4/route.c                  | 6 +++---
 net/ipv6/route.c                  | 2 +-
 8 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index e2a06fd996d5..4a075babe193 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -197,6 +197,7 @@ static __net_init int loopback_net_init(struct net *net)
 	if (err)
 		goto out_free_netdev;
 
+	BUG_ON(dev->ifindex != LOOPBACK_IFINDEX);
 	net->loopback_dev = dev;
 	return 0;
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 6dc3db3466bf..97e441945c13 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -105,6 +105,13 @@ struct net {
 	struct sock		*diag_nlsk;
 };
 
+/*
+ * ifindex generation is per-net namespace, and loopback is
+ * always the 1st device in ns (see net_dev_init), thus any
+ * loopback device should get ifindex 1
+ */
+
+#define LOOPBACK_IFINDEX	1
 
 #include <linux/seq_file_net.h>
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 85a3604c87c8..c855e8d0738f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -961,7 +961,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o
 		.saddr = oldflp->saddr,
 		.flowidn_scope = RT_SCOPE_UNIVERSE,
 		.flowidn_mark = oldflp->flowidn_mark,
-		.flowidn_iif = init_net.loopback_dev->ifindex,
+		.flowidn_iif = LOOPBACK_IFINDEX,
 		.flowidn_oif = oldflp->flowidn_oif,
 	};
 	struct dn_route *rt = NULL;
@@ -979,7 +979,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o
 		       "dn_route_output_slow: dst=%04x src=%04x mark=%d"
 		       " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
 		       le16_to_cpu(oldflp->saddr),
-		       oldflp->flowidn_mark, init_net.loopback_dev->ifindex,
+		       oldflp->flowidn_mark, LOOPBACK_IFINDEX,
 		       oldflp->flowidn_oif);
 
 	/* If we have an output interface, verify its a DECnet device */
@@ -1042,7 +1042,7 @@ source_ok:
 			if (!fld.daddr)
 				goto out;
 		}
-		fld.flowidn_oif = init_net.loopback_dev->ifindex;
+		fld.flowidn_oif = LOOPBACK_IFINDEX;
 		res.type = RTN_LOCAL;
 		goto make_route;
 	}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c43ae3fba792..7f073a38c87d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -218,7 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 	scope = RT_SCOPE_UNIVERSE;
 	if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
 		fl4.flowi4_oif = 0;
-		fl4.flowi4_iif = net->loopback_dev->ifindex;
+		fl4.flowi4_iif = LOOPBACK_IFINDEX;
 		fl4.daddr = ip_hdr(skb)->saddr;
 		fl4.saddr = 0;
 		fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8eec8f4a0536..3a57570c8ee5 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1798,7 +1798,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
 		.flowi4_oif = (rt_is_output_route(rt) ?
 			       skb->dev->ifindex : 0),
 		.flowi4_iif = (rt_is_output_route(rt) ?
-			       net->loopback_dev->ifindex :
+			       LOOPBACK_IFINDEX :
 			       skb->dev->ifindex),
 		.flowi4_mark = skb->mark,
 	};
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 31371be8174b..c30130062cd6 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			return ipv4_is_local_multicast(iph->daddr) ^ invert;
 		flow.flowi4_iif = 0;
 	} else {
-		flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex;
+		flow.flowi4_iif = LOOPBACK_IFINDEX;
 	}
 
 	flow.daddr = iph->saddr;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 21ad369014c0..c58137391a3d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1619,7 +1619,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	if (res.type == RTN_LOCAL) {
 		err = fib_validate_source(skb, saddr, daddr, tos,
-					  net->loopback_dev->ifindex,
+					  LOOPBACK_IFINDEX,
 					  dev, in_dev, &itag);
 		if (err < 0)
 			goto martian_source_keep_err;
@@ -1895,7 +1895,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 
 	orig_oif = fl4->flowi4_oif;
 
-	fl4->flowi4_iif = net->loopback_dev->ifindex;
+	fl4->flowi4_iif = LOOPBACK_IFINDEX;
 	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
 	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
 			 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
@@ -1984,7 +1984,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 		if (!fl4->daddr)
 			fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
 		dev_out = net->loopback_dev;
-		fl4->flowi4_oif = net->loopback_dev->ifindex;
+		fl4->flowi4_oif = LOOPBACK_IFINDEX;
 		res.type = RTN_LOCAL;
 		flags |= RTCF_LOCAL;
 		goto make_route;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8e80fd279100..0ddf2d132e7f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -965,7 +965,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
 {
 	int flags = 0;
 
-	fl6->flowi6_iif = net->loopback_dev->ifindex;
+	fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
 		flags |= RT6_LOOKUP_F_IFACE;
-- 
cgit v1.2.3


From be97fdb5fbcc828240c51769cd28cba609158703 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Thu, 12 Jul 2012 23:06:20 +0300
Subject: ipvs: generalize app registration in netns

	Get rid of the ftp_app pointer and allow applications
to be registered without adding fields in the netns_ipvs structure.

v2: fix coding style as suggested by Pablo Neira Ayuso <pablo@netfilter.org>

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h            |  5 ++--
 net/netfilter/ipvs/ip_vs_app.c | 58 ++++++++++++++++++++++++++++++------------
 net/netfilter/ipvs/ip_vs_ftp.c | 21 ++++-----------
 3 files changed, 49 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 95374d1696a1..4b8f18ff4789 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -808,8 +808,6 @@ struct netns_ipvs {
 	struct list_head	rs_table[IP_VS_RTAB_SIZE];
 	/* ip_vs_app */
 	struct list_head	app_list;
-	/* ip_vs_ftp */
-	struct ip_vs_app	*ftp_app;
 	/* ip_vs_proto */
 	#define IP_VS_PROTO_TAB_SIZE	32	/* must be power of 2 */
 	struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
@@ -1179,7 +1177,8 @@ extern void ip_vs_service_net_cleanup(struct net *net);
  *      (from ip_vs_app.c)
  */
 #define IP_VS_APP_MAX_PORTS  8
-extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern struct ip_vs_app *register_ip_vs_app(struct net *net,
+					    struct ip_vs_app *app);
 extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
 extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 64f9e8f13207..9713e6e86d47 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -180,22 +180,38 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
 }
 
 
-/*
- *	ip_vs_app registration routine
- */
-int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
+/* Register application for netns */
+struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
-	/* increase the module use count */
-	ip_vs_use_count_inc();
+	struct ip_vs_app *a;
+	int err = 0;
+
+	if (!ipvs)
+		return ERR_PTR(-ENOENT);
 
 	mutex_lock(&__ip_vs_app_mutex);
 
-	list_add(&app->a_list, &ipvs->app_list);
+	list_for_each_entry(a, &ipvs->app_list, a_list) {
+		if (!strcmp(app->name, a->name)) {
+			err = -EEXIST;
+			goto out_unlock;
+		}
+	}
+	a = kmemdup(app, sizeof(*app), GFP_KERNEL);
+	if (!a) {
+		err = -ENOMEM;
+		goto out_unlock;
+	}
+	INIT_LIST_HEAD(&a->incs_list);
+	list_add(&a->a_list, &ipvs->app_list);
+	/* increase the module use count */
+	ip_vs_use_count_inc();
 
+out_unlock:
 	mutex_unlock(&__ip_vs_app_mutex);
 
-	return 0;
+	return err ? ERR_PTR(err) : a;
 }
 
 
@@ -205,20 +221,29 @@ int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
  */
 void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
-	struct ip_vs_app *inc, *nxt;
+	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_app *a, *anxt, *inc, *nxt;
+
+	if (!ipvs)
+		return;
 
 	mutex_lock(&__ip_vs_app_mutex);
 
-	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-		ip_vs_app_inc_release(net, inc);
-	}
+	list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
+		if (app && strcmp(app->name, a->name))
+			continue;
+		list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
+			ip_vs_app_inc_release(net, inc);
+		}
 
-	list_del(&app->a_list);
+		list_del(&a->a_list);
+		kfree(a);
 
-	mutex_unlock(&__ip_vs_app_mutex);
+		/* decrease the module use count */
+		ip_vs_use_count_dec();
+	}
 
-	/* decrease the module use count */
-	ip_vs_use_count_dec();
+	mutex_unlock(&__ip_vs_app_mutex);
 }
 
 
@@ -586,5 +611,6 @@ int __net_init ip_vs_app_net_init(struct net *net)
 
 void __net_exit ip_vs_app_net_cleanup(struct net *net)
 {
+	unregister_ip_vs_app(net, NULL /* all */);
 	proc_net_remove(net, "ip_vs_app");
 }
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index b20b29c903ef..ad70b7e4ac4a 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -441,16 +441,10 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
 
 	if (!ipvs)
 		return -ENOENT;
-	app = kmemdup(&ip_vs_ftp, sizeof(struct ip_vs_app), GFP_KERNEL);
-	if (!app)
-		return -ENOMEM;
-	INIT_LIST_HEAD(&app->a_list);
-	INIT_LIST_HEAD(&app->incs_list);
-	ipvs->ftp_app = app;
 
-	ret = register_ip_vs_app(net, app);
-	if (ret)
-		goto err_exit;
+	app = register_ip_vs_app(net, &ip_vs_ftp);
+	if (IS_ERR(app))
+		return PTR_ERR(app);
 
 	for (i = 0; i < ports_count; i++) {
 		if (!ports[i])
@@ -464,9 +458,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
 	return 0;
 
 err_unreg:
-	unregister_ip_vs_app(net, app);
-err_exit:
-	kfree(ipvs->ftp_app);
+	unregister_ip_vs_app(net, &ip_vs_ftp);
 	return ret;
 }
 /*
@@ -474,10 +466,7 @@ err_exit:
  */
 static void __ip_vs_ftp_exit(struct net *net)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
-	unregister_ip_vs_app(net, ipvs->ftp_app);
-	kfree(ipvs->ftp_app);
+	unregister_ip_vs_app(net, &ip_vs_ftp);
 }
 
 static struct pernet_operations ip_vs_ftp_ops = {
-- 
cgit v1.2.3


From 3654e61137db891f5312e6dd813b961484b5fdf3 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 20 Jul 2012 11:59:53 +0300
Subject: ipvs: add pmtu_disc option to disable IP DF for TUN packets

	Disabling PMTU discovery can increase the output packet
rate but some users have enough resources and prefer to fragment
than to drop traffic. By default, we copy the DF bit but if
pmtu_disc is disabled we do not send FRAG_NEEDED messages anymore.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             | 11 +++++++++++
 net/netfilter/ipvs/ip_vs_ctl.c  |  8 ++++++++
 net/netfilter/ipvs/ip_vs_xmit.c |  6 +++---
 3 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 4b8f18ff4789..ee75ccdf5188 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -888,6 +888,7 @@ struct netns_ipvs {
 	unsigned int		sysctl_sync_refresh_period;
 	int			sysctl_sync_retries;
 	int			sysctl_nat_icmp_send;
+	int			sysctl_pmtu_disc;
 
 	/* ip_vs_lblc */
 	int			sysctl_lblc_expiration;
@@ -974,6 +975,11 @@ static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
 	return ipvs->sysctl_sync_sock_size;
 }
 
+static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_pmtu_disc;
+}
+
 #else
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1016,6 +1022,11 @@ static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
 	return 0;
 }
 
+static inline int sysctl_pmtu_disc(struct netns_ipvs *ipvs)
+{
+	return 1;
+}
+
 #endif
 
 /*
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index d6d5ccabe067..03d3fc6d9d64 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1801,6 +1801,12 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "pmtu_disc",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_IP_VS_DEBUG
 	{
 		.procname	= "debug_level",
@@ -3726,6 +3732,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
 	tbl[idx++].data = &ipvs->sysctl_sync_retries;
 	tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+	ipvs->sysctl_pmtu_disc = 1;
+	tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
 
 
 	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index c2275ba048e9..543a554008af 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -795,6 +795,7 @@ int
 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  struct ip_vs_protocol *pp)
 {
+	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
 	struct rtable *rt;			/* Route to the other host */
 	__be32 saddr;				/* Source for tunnel */
 	struct net_device *tdev;		/* Device to other host */
@@ -830,10 +831,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 
 	/* Copy DF, reset fragment offset and MF */
-	df = old_iph->frag_off & htons(IP_DF);
+	df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
 
-	if ((old_iph->frag_off & htons(IP_DF) &&
-	    mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
+	if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error_put;
-- 
cgit v1.2.3


From 3213e1a570783ca3a41d025cede4a27b18bc24c9 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 8 Aug 2012 19:10:14 +0200
Subject: bcma: add (mostly) NAND defines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma_driver_chipcommon.h | 85 +++++++++++++++++++++++++++++
 include/linux/bcma/bcma_regs.h              |  2 +
 2 files changed, 87 insertions(+)

(limited to 'include')

diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 3c80885fa829..fcb06fb284eb 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -94,6 +94,7 @@
 #define  BCMA_CC_CHIPST_4706_SFLASH_TYPE	BIT(2) /* 0: 8b-p/ST-s flash, 1: 16b-p/Atmal-s flash */
 #define  BCMA_CC_CHIPST_4706_MIPS_BENDIAN	BIT(3) /* 0: little, 1: big endian */
 #define  BCMA_CC_CHIPST_4706_PCIE1_DISABLE	BIT(5) /* PCIE1 enable strap pin */
+#define  BCMA_CC_CHIPST_5357_NAND_BOOT		BIT(4) /* NAND boot, valid for CC rev 38 and/or BCM5357 */
 #define BCMA_CC_JCMD			0x0030		/* Rev >= 10 only */
 #define  BCMA_CC_JCMD_START		0x80000000
 #define  BCMA_CC_JCMD_BUSY		0x80000000
@@ -260,6 +261,29 @@
 #define  BCMA_CC_SROM_CONTROL_SIZE_16K	0x00000004
 #define  BCMA_CC_SROM_CONTROL_SIZE_SHIFT	1
 #define  BCMA_CC_SROM_CONTROL_PRESENT	0x00000001
+/* Block 0x140 - 0x190 registers are chipset specific */
+#define BCMA_CC_4706_FLASHSCFG		0x18C		/* Flash struct configuration */
+#define  BCMA_CC_4706_FLASHSCFG_MASK	0x000000ff
+#define  BCMA_CC_4706_FLASHSCFG_SF1	0x00000001	/* 2nd serial flash present */
+#define  BCMA_CC_4706_FLASHSCFG_PF1	0x00000002	/* 2nd parallel flash present */
+#define  BCMA_CC_4706_FLASHSCFG_SF1_TYPE	0x00000004	/* 2nd serial flash type : 0 : ST, 1 : Atmel */
+#define  BCMA_CC_4706_FLASHSCFG_NF1	0x00000008	/* 2nd NAND flash present */
+#define  BCMA_CC_4706_FLASHSCFG_1ST_MADDR_SEG_MASK	0x000000f0
+#define  BCMA_CC_4706_FLASHSCFG_1ST_MADDR_SEG_4MB	0x00000010	/* 4MB */
+#define  BCMA_CC_4706_FLASHSCFG_1ST_MADDR_SEG_8MB	0x00000020	/* 8MB */
+#define  BCMA_CC_4706_FLASHSCFG_1ST_MADDR_SEG_16MB	0x00000030	/* 16MB */
+#define  BCMA_CC_4706_FLASHSCFG_1ST_MADDR_SEG_32MB	0x00000040	/* 32MB */
+#define  BCMA_CC_4706_FLASHSCFG_1ST_MADDR_SEG_64MB	0x00000050	/* 64MB */
+#define  BCMA_CC_4706_FLASHSCFG_1ST_MADDR_SEG_128MB	0x00000060	/* 128MB */
+#define  BCMA_CC_4706_FLASHSCFG_1ST_MADDR_SEG_256MB	0x00000070	/* 256MB */
+/* NAND flash registers for BCM4706 (corerev = 31) */
+#define BCMA_CC_NFLASH_CTL		0x01A0
+#define  BCMA_CC_NFLASH_CTL_ERR		0x08000000
+#define BCMA_CC_NFLASH_CONF		0x01A4
+#define BCMA_CC_NFLASH_COL_ADDR		0x01A8
+#define BCMA_CC_NFLASH_ROW_ADDR		0x01AC
+#define BCMA_CC_NFLASH_DATA		0x01B0
+#define BCMA_CC_NFLASH_WAITCNT0		0x01B4
 /* 0x1E0 is defined as shared BCMA_CLKCTLST */
 #define BCMA_CC_HW_WORKAROUND		0x01E4 /* Hardware workaround (rev >= 20) */
 #define BCMA_CC_UART0_DATA		0x0300
@@ -319,6 +343,60 @@
 #define BCMA_CC_PLLCTL_ADDR		0x0660
 #define BCMA_CC_PLLCTL_DATA		0x0664
 #define BCMA_CC_SPROM			0x0800 /* SPROM beginning */
+/* NAND flash MLC controller registers (corerev >= 38) */
+#define BCMA_CC_NAND_REVISION		0x0C00
+#define BCMA_CC_NAND_CMD_START		0x0C04
+#define BCMA_CC_NAND_CMD_ADDR_X		0x0C08
+#define BCMA_CC_NAND_CMD_ADDR		0x0C0C
+#define BCMA_CC_NAND_CMD_END_ADDR	0x0C10
+#define BCMA_CC_NAND_CS_NAND_SELECT	0x0C14
+#define BCMA_CC_NAND_CS_NAND_XOR	0x0C18
+#define BCMA_CC_NAND_SPARE_RD0		0x0C20
+#define BCMA_CC_NAND_SPARE_RD4		0x0C24
+#define BCMA_CC_NAND_SPARE_RD8		0x0C28
+#define BCMA_CC_NAND_SPARE_RD12		0x0C2C
+#define BCMA_CC_NAND_SPARE_WR0		0x0C30
+#define BCMA_CC_NAND_SPARE_WR4		0x0C34
+#define BCMA_CC_NAND_SPARE_WR8		0x0C38
+#define BCMA_CC_NAND_SPARE_WR12		0x0C3C
+#define BCMA_CC_NAND_ACC_CONTROL	0x0C40
+#define BCMA_CC_NAND_CONFIG		0x0C48
+#define BCMA_CC_NAND_TIMING_1		0x0C50
+#define BCMA_CC_NAND_TIMING_2		0x0C54
+#define BCMA_CC_NAND_SEMAPHORE		0x0C58
+#define BCMA_CC_NAND_DEVID		0x0C60
+#define BCMA_CC_NAND_DEVID_X		0x0C64
+#define BCMA_CC_NAND_BLOCK_LOCK_STATUS	0x0C68
+#define BCMA_CC_NAND_INTFC_STATUS	0x0C6C
+#define BCMA_CC_NAND_ECC_CORR_ADDR_X	0x0C70
+#define BCMA_CC_NAND_ECC_CORR_ADDR	0x0C74
+#define BCMA_CC_NAND_ECC_UNC_ADDR_X	0x0C78
+#define BCMA_CC_NAND_ECC_UNC_ADDR	0x0C7C
+#define BCMA_CC_NAND_READ_ERROR_COUNT	0x0C80
+#define BCMA_CC_NAND_CORR_STAT_THRESHOLD	0x0C84
+#define BCMA_CC_NAND_READ_ADDR_X	0x0C90
+#define BCMA_CC_NAND_READ_ADDR		0x0C94
+#define BCMA_CC_NAND_PAGE_PROGRAM_ADDR_X	0x0C98
+#define BCMA_CC_NAND_PAGE_PROGRAM_ADDR	0x0C9C
+#define BCMA_CC_NAND_COPY_BACK_ADDR_X	0x0CA0
+#define BCMA_CC_NAND_COPY_BACK_ADDR	0x0CA4
+#define BCMA_CC_NAND_BLOCK_ERASE_ADDR_X	0x0CA8
+#define BCMA_CC_NAND_BLOCK_ERASE_ADDR	0x0CAC
+#define BCMA_CC_NAND_INV_READ_ADDR_X	0x0CB0
+#define BCMA_CC_NAND_INV_READ_ADDR	0x0CB4
+#define BCMA_CC_NAND_BLK_WR_PROTECT	0x0CC0
+#define BCMA_CC_NAND_ACC_CONTROL_CS1	0x0CD0
+#define BCMA_CC_NAND_CONFIG_CS1		0x0CD4
+#define BCMA_CC_NAND_TIMING_1_CS1	0x0CD8
+#define BCMA_CC_NAND_TIMING_2_CS1	0x0CDC
+#define BCMA_CC_NAND_SPARE_RD16		0x0D30
+#define BCMA_CC_NAND_SPARE_RD20		0x0D34
+#define BCMA_CC_NAND_SPARE_RD24		0x0D38
+#define BCMA_CC_NAND_SPARE_RD28		0x0D3C
+#define BCMA_CC_NAND_CACHE_ADDR		0x0D40
+#define BCMA_CC_NAND_CACHE_DATA		0x0D44
+#define BCMA_CC_NAND_CTRL_CONFIG	0x0D48
+#define BCMA_CC_NAND_CTRL_STATUS	0x0D4C
 
 /* Divider allocation in 4716/47162/5356 */
 #define BCMA_CC_PMU5_MAINPLL_CPU	1
@@ -409,6 +487,13 @@
 /* 4313 Chip specific ChipControl register bits */
 #define BCMA_CCTRL_4313_12MA_LED_DRIVE		0x00000007	/* 12 mA drive strengh for later 4313 */
 
+/* BCM5357 ChipControl register bits */
+#define BCMA_CHIPCTL_5357_EXTPA			BIT(14)
+#define BCMA_CHIPCTL_5357_ANT_MUX_2O3		BIT(15)
+#define BCMA_CHIPCTL_5357_NFLASH		BIT(16)
+#define BCMA_CHIPCTL_5357_I2S_PINS_ENABLE	BIT(18)
+#define BCMA_CHIPCTL_5357_I2CSPI_PINS_ENABLE	BIT(19)
+
 /* Data for the PMU, if available.
  * Check availability with ((struct bcma_chipcommon)->capabilities & BCMA_CC_CAP_PMU)
  */
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
index 5a71d5719640..a393e82bf7bf 100644
--- a/include/linux/bcma/bcma_regs.h
+++ b/include/linux/bcma/bcma_regs.h
@@ -11,11 +11,13 @@
 #define  BCMA_CLKCTLST_HAVEHTREQ	0x00000010 /* HT available request */
 #define  BCMA_CLKCTLST_HWCROFF		0x00000020 /* Force HW clock request off */
 #define  BCMA_CLKCTLST_EXTRESREQ	0x00000700 /* Mask of external resource requests */
+#define  BCMA_CLKCTLST_EXTRESREQ_SHIFT	8
 #define  BCMA_CLKCTLST_HAVEALP		0x00010000 /* ALP available */
 #define  BCMA_CLKCTLST_HAVEHT		0x00020000 /* HT available */
 #define  BCMA_CLKCTLST_BP_ON_ALP	0x00040000 /* RO: running on ALP clock */
 #define  BCMA_CLKCTLST_BP_ON_HT		0x00080000 /* RO: running on HT clock */
 #define  BCMA_CLKCTLST_EXTRESST		0x07000000 /* Mask of external resource status */
+#define  BCMA_CLKCTLST_EXTRESST_SHIFT	24
 /* Is there any BCM4328 on BCMA bus? */
 #define  BCMA_CLKCTLST_4328A0_HAVEHT	0x00010000 /* 4328a0 has reversed bits */
 #define  BCMA_CLKCTLST_4328A0_HAVEALP	0x00020000 /* 4328a0 has reversed bits */
-- 
cgit v1.2.3


From 902d9e0f48ddc18fb37c1b1edf5e3b27aaba1505 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 8 Aug 2012 19:37:04 +0200
Subject: ssb: check for flash presentence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can not assume parallel flash is always present, there are boards
with *serial* flash and probably some without flash at all.
Define some bits by the way.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Reviewed-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/driver_mipscore.c             | 28 +++++++++++++++++++++-------
 include/linux/ssb/ssb_driver_chipcommon.h |  4 +++-
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/ssb/driver_mipscore.c b/drivers/ssb/driver_mipscore.c
index 7e2ddc042f5b..c6250867a95d 100644
--- a/drivers/ssb/driver_mipscore.c
+++ b/drivers/ssb/driver_mipscore.c
@@ -190,16 +190,30 @@ static void ssb_mips_flash_detect(struct ssb_mipscore *mcore)
 {
 	struct ssb_bus *bus = mcore->dev->bus;
 
-	mcore->flash_buswidth = 2;
-	if (bus->chipco.dev) {
-		mcore->flash_window = 0x1c000000;
-		mcore->flash_window_size = 0x02000000;
+	/* When there is no chipcommon on the bus there is 4MB flash */
+	if (!bus->chipco.dev) {
+		mcore->flash_buswidth = 2;
+		mcore->flash_window = SSB_FLASH1;
+		mcore->flash_window_size = SSB_FLASH1_SZ;
+		return;
+	}
+
+	/* There is ChipCommon, so use it to read info about flash */
+	switch (bus->chipco.capabilities & SSB_CHIPCO_CAP_FLASHT) {
+	case SSB_CHIPCO_FLASHT_STSER:
+	case SSB_CHIPCO_FLASHT_ATSER:
+		pr_err("Serial flash not supported\n");
+		break;
+	case SSB_CHIPCO_FLASHT_PARA:
+		pr_debug("Found parallel flash\n");
+		mcore->flash_window = SSB_FLASH2;
+		mcore->flash_window_size = SSB_FLASH2_SZ;
 		if ((ssb_read32(bus->chipco.dev, SSB_CHIPCO_FLASH_CFG)
 		               & SSB_CHIPCO_CFG_DS16) == 0)
 			mcore->flash_buswidth = 1;
-	} else {
-		mcore->flash_window = 0x1fc00000;
-		mcore->flash_window_size = 0x00400000;
+		else
+			mcore->flash_buswidth = 2;
+		break;
 	}
 }
 
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index 1a6b0045b06b..c2b02a5c86ae 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -504,7 +504,9 @@
 #define SSB_CHIPCO_FLASHCTL_ST_SE	0x02D8		/* Sector Erase */
 #define SSB_CHIPCO_FLASHCTL_ST_BE	0x00C7		/* Bulk Erase */
 #define SSB_CHIPCO_FLASHCTL_ST_DP	0x00B9		/* Deep Power-down */
-#define SSB_CHIPCO_FLASHCTL_ST_RSIG	0x03AB		/* Read Electronic Signature */
+#define SSB_CHIPCO_FLASHCTL_ST_RES	0x03AB		/* Read Electronic Signature */
+#define SSB_CHIPCO_FLASHCTL_ST_CSA	0x1000		/* Keep chip select asserted */
+#define SSB_CHIPCO_FLASHCTL_ST_SSE	0x0220		/* Sub-sector Erase */
 
 /* Status register bits for ST flashes */
 #define SSB_CHIPCO_FLASHSTA_ST_WIP	0x01		/* Write In Progress */
-- 
cgit v1.2.3


From ee89bab14e857678f83a71ee99e575b0fdbb58d4 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 9 Aug 2012 22:14:56 +0000
Subject: net: move and rename netif_notify_peers()

I believe net/core/dev.c is a better place for netif_notify_peers(),
because other net event notify functions also stay in this file.

And rename it to netdev_notify_peers().

Cc: David S. Miller <davem@davemloft.net>
Cc: Ian Campbell <Ian.Campbell@citrix.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c |  2 +-
 drivers/net/virtio_net.c        |  2 +-
 drivers/net/xen-netfront.c      |  2 +-
 include/linux/netdevice.h       |  3 +--
 net/core/dev.c                  | 18 ++++++++++++++++++
 net/sched/sch_generic.c         | 18 ------------------
 6 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 8c5a1c43c81d..e91111a656f7 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -400,7 +400,7 @@ static void netvsc_send_garp(struct work_struct *w)
 	ndev_ctx = container_of(w, struct net_device_context, dwork.work);
 	net_device = hv_get_drvdata(ndev_ctx->device_ctx);
 	net = net_device->ndev;
-	netif_notify_peers(net);
+	netdev_notify_peers(net);
 }
 
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 83d2b0c34c5e..81a64c58e8ad 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -993,7 +993,7 @@ static void virtnet_config_changed_work(struct work_struct *work)
 		goto done;
 
 	if (v & VIRTIO_NET_S_ANNOUNCE) {
-		netif_notify_peers(vi->dev);
+		netdev_notify_peers(vi->dev);
 		virtnet_ack_link_announce(vi);
 	}
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 30899901aef5..39afd37e62b3 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1731,7 +1731,7 @@ static void netback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateConnected:
-		netif_notify_peers(netdev);
+		netdev_notify_peers(netdev);
 		break;
 
 	case XenbusStateClosing:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a9db4f33407f..8d4b7316c734 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2246,8 +2246,6 @@ extern void netif_carrier_on(struct net_device *dev);
 
 extern void netif_carrier_off(struct net_device *dev);
 
-extern void netif_notify_peers(struct net_device *dev);
-
 /**
  *	netif_dormant_on - mark device as dormant.
  *	@dev: network device
@@ -2596,6 +2594,7 @@ extern void		__dev_set_rx_mode(struct net_device *dev);
 extern int		dev_set_promiscuity(struct net_device *dev, int inc);
 extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
+extern void		netdev_notify_peers(struct net_device *dev);
 extern int		netdev_bonding_change(struct net_device *dev,
 					      unsigned long event);
 extern void		netdev_features_change(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1f06df8d10a3..5defcf940005 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1106,6 +1106,24 @@ void netdev_state_change(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_state_change);
 
+/**
+ * 	netdev_notify_peers - notify network peers about existence of @dev
+ * 	@dev: network device
+ *
+ * Generate traffic such that interested network peers are aware of
+ * @dev, such as by generating a gratuitous ARP. This may be used when
+ * a device wants to inform the rest of the network about some sort of
+ * reconfiguration such as a failover event or virtual machine
+ * migration.
+ */
+void netdev_notify_peers(struct net_device *dev)
+{
+	rtnl_lock();
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL(netdev_notify_peers);
+
 int netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
 	return call_netdevice_notifiers(event, dev);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 511323e89cec..6c4d5fe53ce8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -324,24 +324,6 @@ void netif_carrier_off(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_carrier_off);
 
-/**
- * 	netif_notify_peers - notify network peers about existence of @dev
- * 	@dev: network device
- *
- * Generate traffic such that interested network peers are aware of
- * @dev, such as by generating a gratuitous ARP. This may be used when
- * a device wants to inform the rest of the network about some sort of
- * reconfiguration such as a failover event or virtual machine
- * migration.
- */
-void netif_notify_peers(struct net_device *dev)
-{
-	rtnl_lock();
-	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
-	rtnl_unlock();
-}
-EXPORT_SYMBOL(netif_notify_peers);
-
 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
    under all circumstances. It is difficult to invent anything faster or
    cheaper.
-- 
cgit v1.2.3


From b7bc2a5b5bd99b216c3e5fe68c7f45c684ab5745 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Thu, 9 Aug 2012 22:14:57 +0000
Subject: net: remove netdev_bonding_change()

I don't see any benifits to use netdev_bonding_change() than
using call_netdevice_notifiers() directly.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 20 ++++++++++----------
 include/linux/netdevice.h       |  2 --
 net/core/dev.c                  |  6 ------
 3 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6fae5f3ec7f6..d95fbc34b229 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1120,10 +1120,10 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 			write_unlock_bh(&bond->curr_slave_lock);
 			read_unlock(&bond->lock);
 
-			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
+			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
 			if (should_notify_peers)
-				netdev_bonding_change(bond->dev,
-						      NETDEV_NOTIFY_PEERS);
+				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+							 bond->dev);
 
 			read_lock(&bond->lock);
 			write_lock_bh(&bond->curr_slave_lock);
@@ -1560,8 +1560,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				 bond_dev->name,
 				 bond_dev->type, slave_dev->type);
 
-			res = netdev_bonding_change(bond_dev,
-						    NETDEV_PRE_TYPE_CHANGE);
+			res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE,
+						       bond_dev);
 			res = notifier_to_errno(res);
 			if (res) {
 				pr_err("%s: refused to change device type\n",
@@ -1581,8 +1581,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 			}
 
-			netdev_bonding_change(bond_dev,
-					      NETDEV_POST_TYPE_CHANGE);
+			call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
+						 bond_dev);
 		}
 	} else if (bond_dev->type != slave_dev->type) {
 		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n",
@@ -1943,7 +1943,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	}
 
 	block_netpoll_tx();
-	netdev_bonding_change(bond_dev, NETDEV_RELEASE);
+	call_netdevice_notifiers(NETDEV_RELEASE, bond_dev);
 	write_lock_bh(&bond->lock);
 
 	slave = bond_get_slave_by_dev(bond, slave_dev);
@@ -2586,7 +2586,7 @@ re_arm:
 			read_unlock(&bond->lock);
 			return;
 		}
-		netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
 		rtnl_unlock();
 	}
 }
@@ -3205,7 +3205,7 @@ re_arm:
 			read_unlock(&bond->lock);
 			return;
 		}
-		netdev_bonding_change(bond->dev, NETDEV_NOTIFY_PEERS);
+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
 		rtnl_unlock();
 	}
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8d4b7316c734..1d6ab69c1f3f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2595,8 +2595,6 @@ extern int		dev_set_promiscuity(struct net_device *dev, int inc);
 extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
 extern void		netdev_notify_peers(struct net_device *dev);
-extern int		netdev_bonding_change(struct net_device *dev,
-					      unsigned long event);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
diff --git a/net/core/dev.c b/net/core/dev.c
index 5defcf940005..ce1bccb08de5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1124,12 +1124,6 @@ void netdev_notify_peers(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_notify_peers);
 
-int netdev_bonding_change(struct net_device *dev, unsigned long event)
-{
-	return call_netdevice_notifiers(event, dev);
-}
-EXPORT_SYMBOL(netdev_bonding_change);
-
 /**
  *	dev_load 	- load a network module
  *	@net: the applicable net namespace
-- 
cgit v1.2.3


From c12b395a46646bab69089ce7016ac78177f6001f Mon Sep 17 00:00:00 2001
From: "xeb@mail.ru" <xeb@mail.ru>
Date: Fri, 10 Aug 2012 00:51:50 +0000
Subject: gre: Support GRE over IPv6

GRE over IPv6 implementation.

Signed-off-by: Dmitry Kozlov <xeb@mail.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h     |    1 +
 include/linux/if_tunnel.h  |    3 +
 include/linux/ip6_tunnel.h |   17 +
 include/net/ip6_tunnel.h   |   41 +-
 include/net/ipv6.h         |    1 +
 net/ipv6/Kconfig           |   16 +
 net/ipv6/Makefile          |    1 +
 net/ipv6/ip6_gre.c         | 1790 ++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_tunnel.c      |   89 ++-
 9 files changed, 1933 insertions(+), 26 deletions(-)
 create mode 100644 net/ipv6/ip6_gre.c

(limited to 'include')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index f0e69c6e8208..9adcc29f084a 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -92,6 +92,7 @@
 #define ARPHRD_PHONET	820		/* PhoNet media type		*/
 #define ARPHRD_PHONET_PIPE 821		/* PhoNet pipe header		*/
 #define ARPHRD_CAIF	822		/* CAIF media type		*/
+#define ARPHRD_IP6GRE	823		/* GRE over IPv6		*/
 
 #define ARPHRD_VOID	  0xFFFF	/* Void type, nothing is known */
 #define ARPHRD_NONE	  0xFFFE	/* zero header length */
diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 5efff60b6f56..8c5035ac3142 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -75,6 +75,9 @@ enum {
 	IFLA_GRE_TTL,
 	IFLA_GRE_TOS,
 	IFLA_GRE_PMTUDISC,
+	IFLA_GRE_ENCAP_LIMIT,
+	IFLA_GRE_FLOWINFO,
+	IFLA_GRE_FLAGS,
 	__IFLA_GRE_MAX,
 };
 
diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h
index bf22b0317902..48af63c9a48d 100644
--- a/include/linux/ip6_tunnel.h
+++ b/include/linux/ip6_tunnel.h
@@ -31,4 +31,21 @@ struct ip6_tnl_parm {
 	struct in6_addr raddr;	/* remote tunnel end-point address */
 };
 
+struct ip6_tnl_parm2 {
+	char name[IFNAMSIZ];	/* name of tunnel device */
+	int link;		/* ifindex of underlying L2 interface */
+	__u8 proto;		/* tunnel protocol */
+	__u8 encap_limit;	/* encapsulation limit for tunnel */
+	__u8 hop_limit;		/* hop limit for tunnel */
+	__be32 flowinfo;	/* traffic class and flowlabel for tunnel */
+	__u32 flags;		/* tunnel flags */
+	struct in6_addr laddr;	/* local tunnel end-point address */
+	struct in6_addr raddr;	/* remote tunnel end-point address */
+
+	__be16			i_flags;
+	__be16			o_flags;
+	__be32			i_key;
+	__be32			o_key;
+};
+
 #endif
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 358fb86f57eb..e03047f7090b 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -5,6 +5,8 @@
 #include <linux/netdevice.h>
 #include <linux/ip6_tunnel.h>
 
+#define IP6TUNNEL_ERR_TIMEO (30*HZ)
+
 /* capable of sending packets */
 #define IP6_TNL_F_CAP_XMIT 0x10000
 /* capable of receiving packets */
@@ -12,15 +14,40 @@
 /* determine capability on a per-packet basis */
 #define IP6_TNL_F_CAP_PER_PACKET 0x40000
 
-/* IPv6 tunnel */
+struct __ip6_tnl_parm {
+	char name[IFNAMSIZ];	/* name of tunnel device */
+	int link;		/* ifindex of underlying L2 interface */
+	__u8 proto;		/* tunnel protocol */
+	__u8 encap_limit;	/* encapsulation limit for tunnel */
+	__u8 hop_limit;		/* hop limit for tunnel */
+	__be32 flowinfo;	/* traffic class and flowlabel for tunnel */
+	__u32 flags;		/* tunnel flags */
+	struct in6_addr laddr;	/* local tunnel end-point address */
+	struct in6_addr raddr;	/* remote tunnel end-point address */
+
+	__be16			i_flags;
+	__be16			o_flags;
+	__be32			i_key;
+	__be32			o_key;
+};
 
+/* IPv6 tunnel */
 struct ip6_tnl {
 	struct ip6_tnl __rcu *next;	/* next tunnel in list */
 	struct net_device *dev;	/* virtual device associated with tunnel */
-	struct ip6_tnl_parm parms;	/* tunnel configuration parameters */
+	struct __ip6_tnl_parm parms;	/* tunnel configuration parameters */
 	struct flowi fl;	/* flowi template for xmit */
 	struct dst_entry *dst_cache;    /* cached dst */
 	u32 dst_cookie;
+
+	int err_count;
+	unsigned long err_time;
+
+	/* These fields used only by GRE */
+	__u32 i_seqno;	/* The last seen seqno	*/
+	__u32 o_seqno;	/* The last output seqno */
+	int hlen;       /* Precalculated GRE header length */
+	int mlink;
 };
 
 /* Tunnel encapsulation limit destination sub-option */
@@ -31,4 +58,14 @@ struct ipv6_tlv_tnl_enc_lim {
 	__u8 encap_limit;	/* tunnel encapsulation limit   */
 } __packed;
 
+struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t);
+void ip6_tnl_dst_reset(struct ip6_tnl *t);
+void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst);
+int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
+		const struct in6_addr *raddr);
+int ip6_tnl_xmit_ctl(struct ip6_tnl *t);
+__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw);
+__u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr,
+			     const struct in6_addr *raddr);
+
 #endif
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 01c34b363a34..6d01fb00ff2b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -34,6 +34,7 @@
 #define NEXTHDR_IPV6		41	/* IPv6 in IPv6 */
 #define NEXTHDR_ROUTING		43	/* Routing header. */
 #define NEXTHDR_FRAGMENT	44	/* Fragmentation/reassembly header. */
+#define NEXTHDR_GRE		47	/* GRE header. */
 #define NEXTHDR_ESP		50	/* Encapsulating security payload. */
 #define NEXTHDR_AUTH		51	/* Authentication header. */
 #define NEXTHDR_ICMP		58	/* ICMP for IPv6. */
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 5728695b5449..4f7fe7270e37 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -201,6 +201,22 @@ config IPV6_TUNNEL
 
 	  If unsure, say N.
 
+config IPV6_GRE
+	tristate "IPv6: GRE tunnel"
+	select IPV6_TUNNEL
+	---help---
+	  Tunneling means encapsulating data of one protocol type within
+	  another protocol and sending it over a channel that understands the
+	  encapsulating protocol. This particular tunneling driver implements
+	  GRE (Generic Routing Encapsulation) and at this time allows
+	  encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure.
+	  This driver is useful if the other endpoint is a Cisco router: Cisco
+	  likes GRE much better than the other Linux tunneling driver ("IP
+	  tunneling" above). In addition, GRE allows multicast redistribution
+	  through the tunnel.
+
+	  Saying M here will produce a module called ip6_gre. If unsure, say N.
+
 config IPV6_MULTIPLE_TABLES
 	bool "IPv6: Multiple Routing Tables"
 	depends on EXPERIMENTAL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 686934acfac1..b6d3f79151e2 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER)	+= netfilter/
 
 obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
 
 obj-y += addrconf_core.o exthdrs_core.o
 
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
new file mode 100644
index 000000000000..a84ad5dc4fcf
--- /dev/null
+++ b/net/ipv6/ip6_gre.c
@@ -0,0 +1,1790 @@
+/*
+ *	GRE over IPv6 protocol decoder.
+ *
+ *	Authors: Dmitry Kozlov (xeb@mail.ru)
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/in6.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/hash.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/protocol.h>
+#include <net/addrconf.h>
+#include <net/arp.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/rtnetlink.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
+
+
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
+
+#define HASH_SIZE_SHIFT  5
+#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+
+static int ip6gre_net_id __read_mostly;
+struct ip6gre_net {
+	struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+
+	struct net_device *fb_tunnel_dev;
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
+static int ip6gre_tunnel_init(struct net_device *dev);
+static void ip6gre_tunnel_setup(struct net_device *dev);
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+
+/* Tunnel hash table */
+
+/*
+   4 hash tables:
+
+   3: (remote,local)
+   2: (remote,*)
+   1: (*,local)
+   0: (*,*)
+
+   We require exact key match i.e. if a key is present in packet
+   it will match only tunnel with the same key; if it is not present,
+   it will match only keyless tunnel.
+
+   All keysless packets, if not matched configured keyless tunnels
+   will match fallback tunnel.
+ */
+
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+static u32 HASH_ADDR(const struct in6_addr *addr)
+{
+	u32 hash = ipv6_addr_hash(addr);
+
+	return hash_32(hash, HASH_SIZE_SHIFT);
+}
+
+#define tunnels_r_l	tunnels[3]
+#define tunnels_r	tunnels[2]
+#define tunnels_l	tunnels[1]
+#define tunnels_wc	tunnels[0]
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
+
+#define for_each_ip_tunnel_rcu(start) \
+	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
+	struct u64_stats_sync	syncp;
+};
+
+static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
+		struct rtnl_link_stats64 *tot)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_bh(&tstats->syncp);
+			rx_packets = tstats->rx_packets;
+			tx_packets = tstats->tx_packets;
+			rx_bytes = tstats->rx_bytes;
+			tx_bytes = tstats->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+		tot->rx_packets += rx_packets;
+		tot->tx_packets += tx_packets;
+		tot->rx_bytes   += rx_bytes;
+		tot->tx_bytes   += tx_bytes;
+	}
+
+	tot->multicast = dev->stats.multicast;
+	tot->rx_crc_errors = dev->stats.rx_crc_errors;
+	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+	tot->rx_length_errors = dev->stats.rx_length_errors;
+	tot->rx_errors = dev->stats.rx_errors;
+	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+	tot->tx_dropped = dev->stats.tx_dropped;
+	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+	tot->tx_errors = dev->stats.tx_errors;
+
+	return tot;
+}
+
+/* Given src, dst and key, find appropriate for input tunnel. */
+
+static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
+		const struct in6_addr *remote, const struct in6_addr *local,
+		__be32 key, __be16 gre_proto)
+{
+	struct net *net = dev_net(dev);
+	int link = dev->ifindex;
+	unsigned int h0 = HASH_ADDR(remote);
+	unsigned int h1 = HASH_KEY(key);
+	struct ip6_tnl *t, *cand = NULL;
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+		       ARPHRD_ETHER : ARPHRD_IP6GRE;
+	int score, cand_score = 4;
+
+	for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
+		if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+		    !ipv6_addr_equal(remote, &t->parms.raddr) ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IP6GRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		score = 0;
+		if (t->parms.link != link)
+			score |= 1;
+		if (t->dev->type != dev_type)
+			score |= 2;
+		if (score == 0)
+			return t;
+
+		if (score < cand_score) {
+			cand = t;
+			cand_score = score;
+		}
+	}
+
+	for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
+		if (!ipv6_addr_equal(remote, &t->parms.raddr) ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IP6GRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		score = 0;
+		if (t->parms.link != link)
+			score |= 1;
+		if (t->dev->type != dev_type)
+			score |= 2;
+		if (score == 0)
+			return t;
+
+		if (score < cand_score) {
+			cand = t;
+			cand_score = score;
+		}
+	}
+
+	for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
+		if ((!ipv6_addr_equal(local, &t->parms.laddr) &&
+			  (!ipv6_addr_equal(local, &t->parms.raddr) ||
+				 !ipv6_addr_is_multicast(local))) ||
+		    key != t->parms.i_key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IP6GRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		score = 0;
+		if (t->parms.link != link)
+			score |= 1;
+		if (t->dev->type != dev_type)
+			score |= 2;
+		if (score == 0)
+			return t;
+
+		if (score < cand_score) {
+			cand = t;
+			cand_score = score;
+		}
+	}
+
+	for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
+		if (t->parms.i_key != key ||
+		    !(t->dev->flags & IFF_UP))
+			continue;
+
+		if (t->dev->type != ARPHRD_IP6GRE &&
+		    t->dev->type != dev_type)
+			continue;
+
+		score = 0;
+		if (t->parms.link != link)
+			score |= 1;
+		if (t->dev->type != dev_type)
+			score |= 2;
+		if (score == 0)
+			return t;
+
+		if (score < cand_score) {
+			cand = t;
+			cand_score = score;
+		}
+	}
+
+	if (cand != NULL)
+		return cand;
+
+	dev = ign->fb_tunnel_dev;
+	if (dev->flags & IFF_UP)
+		return netdev_priv(dev);
+
+	return NULL;
+}
+
+static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign,
+		const struct __ip6_tnl_parm *p)
+{
+	const struct in6_addr *remote = &p->raddr;
+	const struct in6_addr *local = &p->laddr;
+	unsigned int h = HASH_KEY(p->i_key);
+	int prio = 0;
+
+	if (!ipv6_addr_any(local))
+		prio |= 1;
+	if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) {
+		prio |= 2;
+		h ^= HASH_ADDR(remote);
+	}
+
+	return &ign->tunnels[prio][h];
+}
+
+static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign,
+		const struct ip6_tnl *t)
+{
+	return __ip6gre_bucket(ign, &t->parms);
+}
+
+static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+	struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+
+	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
+}
+
+static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
+{
+	struct ip6_tnl __rcu **tp;
+	struct ip6_tnl *iter;
+
+	for (tp = ip6gre_bucket(ign, t);
+	     (iter = rtnl_dereference(*tp)) != NULL;
+	     tp = &iter->next) {
+		if (t == iter) {
+			rcu_assign_pointer(*tp, t->next);
+			break;
+		}
+	}
+}
+
+static struct ip6_tnl *ip6gre_tunnel_find(struct net *net,
+					   const struct __ip6_tnl_parm *parms,
+					   int type)
+{
+	const struct in6_addr *remote = &parms->raddr;
+	const struct in6_addr *local = &parms->laddr;
+	__be32 key = parms->i_key;
+	int link = parms->link;
+	struct ip6_tnl *t;
+	struct ip6_tnl __rcu **tp;
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+	for (tp = __ip6gre_bucket(ign, parms);
+	     (t = rtnl_dereference(*tp)) != NULL;
+	     tp = &t->next)
+		if (ipv6_addr_equal(local, &t->parms.laddr) &&
+		    ipv6_addr_equal(remote, &t->parms.raddr) &&
+		    key == t->parms.i_key &&
+		    link == t->parms.link &&
+		    type == t->dev->type)
+			break;
+
+	return t;
+}
+
+static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
+		const struct __ip6_tnl_parm *parms, int create)
+{
+	struct ip6_tnl *t, *nt;
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+	t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE);
+	if (t || !create)
+		return t;
+
+	if (parms->name[0])
+		strlcpy(name, parms->name, IFNAMSIZ);
+	else
+		strcpy(name, "ip6gre%d");
+
+	dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup);
+	if (!dev)
+		return NULL;
+
+	dev_net_set(dev, net);
+
+	nt = netdev_priv(dev);
+	nt->parms = *parms;
+	dev->rtnl_link_ops = &ip6gre_link_ops;
+
+	nt->dev = dev;
+	ip6gre_tnl_link_config(nt, 1);
+
+	if (register_netdevice(dev) < 0)
+		goto failed_free;
+
+	/* Can use a lockless transmit, unless we generate output sequences */
+	if (!(nt->parms.o_flags & GRE_SEQ))
+		dev->features |= NETIF_F_LLTX;
+
+	dev_hold(dev);
+	ip6gre_tunnel_link(ign, nt);
+	return nt;
+
+failed_free:
+	free_netdev(dev);
+	return NULL;
+}
+
+static void ip6gre_tunnel_uninit(struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+	ip6gre_tunnel_unlink(ign, netdev_priv(dev));
+	dev_put(dev);
+}
+
+
+static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+		u8 type, u8 code, int offset, __be32 info)
+{
+	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
+	__be16 *p = (__be16 *)(ipv6h + 1);
+	int grehlen = sizeof(ipv6h) + 4;
+	struct ip6_tnl *t;
+	__be16 flags;
+
+	flags = p[0];
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			return;
+		if (flags&GRE_KEY) {
+			grehlen += 4;
+			if (flags&GRE_CSUM)
+				grehlen += 4;
+		}
+	}
+
+	/* If only 8 bytes returned, keyed message will be dropped here */
+	if (skb_headlen(skb) < grehlen)
+		return;
+
+	rcu_read_lock();
+
+	t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
+				flags & GRE_KEY ?
+				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
+				p[1]);
+	if (t == NULL)
+		goto out;
+
+	switch (type) {
+		__u32 teli;
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		__u32 mtu;
+	case ICMPV6_DEST_UNREACH:
+		net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
+				     t->parms.name);
+		break;
+	case ICMPV6_TIME_EXCEED:
+		if (code == ICMPV6_EXC_HOPLIMIT) {
+			net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+					     t->parms.name);
+		}
+		break;
+	case ICMPV6_PARAMPROB:
+		teli = 0;
+		if (code == ICMPV6_HDR_FIELD)
+			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
+
+		if (teli && teli == info - 2) {
+			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+			if (tel->encap_limit == 0) {
+				net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
+						     t->parms.name);
+			}
+		} else {
+			net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+					     t->parms.name);
+		}
+		break;
+	case ICMPV6_PKT_TOOBIG:
+		mtu = info - offset;
+		if (mtu < IPV6_MIN_MTU)
+			mtu = IPV6_MIN_MTU;
+		t->dev->mtu = mtu;
+		break;
+	}
+
+	if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
+		t->err_count++;
+	else
+		t->err_count = 1;
+	t->err_time = jiffies;
+out:
+	rcu_read_unlock();
+}
+
+static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t,
+		const struct ipv6hdr *ipv6h, struct sk_buff *skb)
+{
+	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
+
+	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
+
+	if (INET_ECN_is_ce(dsfield))
+		IP_ECN_set_ce(ip_hdr(skb));
+}
+
+static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t,
+		const struct ipv6hdr *ipv6h, struct sk_buff *skb)
+{
+	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
+
+	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
+		IP6_ECN_set_ce(ipv6_hdr(skb));
+}
+
+static int ip6gre_rcv(struct sk_buff *skb)
+{
+	const struct ipv6hdr *ipv6h;
+	u8     *h;
+	__be16    flags;
+	__sum16   csum = 0;
+	__be32 key = 0;
+	u32    seqno = 0;
+	struct ip6_tnl *tunnel;
+	int    offset = 4;
+	__be16 gre_proto;
+
+	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+		goto drop_nolock;
+
+	ipv6h = ipv6_hdr(skb);
+	h = skb->data;
+	flags = *(__be16 *)h;
+
+	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
+		/* - Version must be 0.
+		   - We do not support routing headers.
+		 */
+		if (flags&(GRE_VERSION|GRE_ROUTING))
+			goto drop_nolock;
+
+		if (flags&GRE_CSUM) {
+			switch (skb->ip_summed) {
+			case CHECKSUM_COMPLETE:
+				csum = csum_fold(skb->csum);
+				if (!csum)
+					break;
+				/* fall through */
+			case CHECKSUM_NONE:
+				skb->csum = 0;
+				csum = __skb_checksum_complete(skb);
+				skb->ip_summed = CHECKSUM_COMPLETE;
+			}
+			offset += 4;
+		}
+		if (flags&GRE_KEY) {
+			key = *(__be32 *)(h + offset);
+			offset += 4;
+		}
+		if (flags&GRE_SEQ) {
+			seqno = ntohl(*(__be32 *)(h + offset));
+			offset += 4;
+		}
+	}
+
+	gre_proto = *(__be16 *)(h + 2);
+
+	rcu_read_lock();
+	tunnel = ip6gre_tunnel_lookup(skb->dev,
+					  &ipv6h->saddr, &ipv6h->daddr, key,
+					  gre_proto);
+	if (tunnel) {
+		struct pcpu_tstats *tstats;
+
+		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+			goto drop;
+
+		if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) {
+			tunnel->dev->stats.rx_dropped++;
+			goto drop;
+		}
+
+		secpath_reset(skb);
+
+		skb->protocol = gre_proto;
+		/* WCCP version 1 and 2 protocol decoding.
+		 * - Change protocol to IP
+		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+		 */
+		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
+			skb->protocol = htons(ETH_P_IP);
+			if ((*(h + offset) & 0xF0) != 0x40)
+				offset += 4;
+		}
+
+		skb->mac_header = skb->network_header;
+		__pskb_pull(skb, offset);
+		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
+		skb->pkt_type = PACKET_HOST;
+
+		if (((flags&GRE_CSUM) && csum) ||
+		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
+			tunnel->dev->stats.rx_crc_errors++;
+			tunnel->dev->stats.rx_errors++;
+			goto drop;
+		}
+		if (tunnel->parms.i_flags&GRE_SEQ) {
+			if (!(flags&GRE_SEQ) ||
+			    (tunnel->i_seqno &&
+					(s32)(seqno - tunnel->i_seqno) < 0)) {
+				tunnel->dev->stats.rx_fifo_errors++;
+				tunnel->dev->stats.rx_errors++;
+				goto drop;
+			}
+			tunnel->i_seqno = seqno + 1;
+		}
+
+		/* Warning: All skb pointers will be invalidated! */
+		if (tunnel->dev->type == ARPHRD_ETHER) {
+			if (!pskb_may_pull(skb, ETH_HLEN)) {
+				tunnel->dev->stats.rx_length_errors++;
+				tunnel->dev->stats.rx_errors++;
+				goto drop;
+			}
+
+			ipv6h = ipv6_hdr(skb);
+			skb->protocol = eth_type_trans(skb, tunnel->dev);
+			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+		}
+
+		tstats = this_cpu_ptr(tunnel->dev->tstats);
+		u64_stats_update_begin(&tstats->syncp);
+		tstats->rx_packets++;
+		tstats->rx_bytes += skb->len;
+		u64_stats_update_end(&tstats->syncp);
+
+		__skb_tunnel_rx(skb, tunnel->dev);
+
+		skb_reset_network_header(skb);
+		if (skb->protocol == htons(ETH_P_IP))
+			ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb);
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb);
+
+		netif_rx(skb);
+
+		rcu_read_unlock();
+		return 0;
+	}
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+	rcu_read_unlock();
+drop_nolock:
+	kfree_skb(skb);
+	return 0;
+}
+
+struct ipv6_tel_txoption {
+	struct ipv6_txoptions ops;
+	__u8 dst_opt[8];
+};
+
+static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
+{
+	memset(opt, 0, sizeof(struct ipv6_tel_txoption));
+
+	opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
+	opt->dst_opt[3] = 1;
+	opt->dst_opt[4] = encap_limit;
+	opt->dst_opt[5] = IPV6_TLV_PADN;
+	opt->dst_opt[6] = 1;
+
+	opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
+	opt->ops.opt_nflen = 8;
+}
+
+static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
+			 struct net_device *dev,
+			 __u8 dsfield,
+			 struct flowi6 *fl6,
+			 int encap_limit,
+			 __u32 *pmtu)
+{
+	struct net *net = dev_net(dev);
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+	struct net_device *tdev;    /* Device to other host */
+	struct ipv6hdr  *ipv6h;     /* Our new IP header */
+	unsigned int max_headroom;  /* The extra header space needed */
+	int    gre_hlen;
+	struct ipv6_tel_txoption opt;
+	int    mtu;
+	struct dst_entry *dst = NULL, *ndst = NULL;
+	struct net_device_stats *stats = &tunnel->dev->stats;
+	int err = -1;
+	u8 proto;
+	int pkt_len;
+	struct sk_buff *new_skb;
+
+	if (dev->type == ARPHRD_ETHER)
+		IPCB(skb)->flags = 0;
+
+	if (dev->header_ops && dev->type == ARPHRD_IP6GRE) {
+		gre_hlen = 0;
+		ipv6h = (struct ipv6hdr *)skb->data;
+		fl6->daddr = ipv6h->daddr;
+	} else {
+		gre_hlen = tunnel->hlen;
+		fl6->daddr = tunnel->parms.raddr;
+	}
+
+	if (!fl6->flowi6_mark)
+		dst = ip6_tnl_dst_check(tunnel);
+
+	if (!dst) {
+		ndst = ip6_route_output(net, NULL, fl6);
+
+		if (ndst->error)
+			goto tx_err_link_failure;
+		ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
+		if (IS_ERR(ndst)) {
+			err = PTR_ERR(ndst);
+			ndst = NULL;
+			goto tx_err_link_failure;
+		}
+		dst = ndst;
+	}
+
+	tdev = dst->dev;
+
+	if (tdev == dev) {
+		stats->collisions++;
+		net_warn_ratelimited("%s: Local routing loop detected!\n",
+				     tunnel->parms.name);
+		goto tx_err_dst_release;
+	}
+
+	mtu = dst_mtu(dst) - sizeof(*ipv6h);
+	if (encap_limit >= 0) {
+		max_headroom += 8;
+		mtu -= 8;
+	}
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+	if (skb_dst(skb))
+		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+	if (skb->len > mtu) {
+		*pmtu = mtu;
+		err = -EMSGSIZE;
+		goto tx_err_dst_release;
+	}
+
+	if (tunnel->err_count > 0) {
+		if (time_before(jiffies,
+				tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) {
+			tunnel->err_count--;
+
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+
+	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
+	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
+		new_skb = skb_realloc_headroom(skb, max_headroom);
+		if (max_headroom > dev->needed_headroom)
+			dev->needed_headroom = max_headroom;
+		if (!new_skb)
+			goto tx_err_dst_release;
+
+		if (skb->sk)
+			skb_set_owner_w(new_skb, skb->sk);
+		consume_skb(skb);
+		skb = new_skb;
+	}
+
+	skb_dst_drop(skb);
+
+	if (fl6->flowi6_mark) {
+		skb_dst_set(skb, dst);
+		ndst = NULL;
+	} else {
+		skb_dst_set_noref(skb, dst);
+	}
+
+	skb->transport_header = skb->network_header;
+
+	proto = NEXTHDR_GRE;
+	if (encap_limit >= 0) {
+		init_tel_txopt(&opt, encap_limit);
+		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+	}
+
+	skb_push(skb, gre_hlen);
+	skb_reset_network_header(skb);
+
+	/*
+	 *	Push down and install the IP header.
+	 */
+	ipv6h = ipv6_hdr(skb);
+	*(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000);
+	dsfield = INET_ECN_encapsulate(0, dsfield);
+	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+	ipv6h->hop_limit = tunnel->parms.hop_limit;
+	ipv6h->nexthdr = proto;
+	ipv6h->saddr = fl6->saddr;
+	ipv6h->daddr = fl6->daddr;
+
+	((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
+	((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
+				   htons(ETH_P_TEB) : skb->protocol;
+
+	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
+		__be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
+
+		if (tunnel->parms.o_flags&GRE_SEQ) {
+			++tunnel->o_seqno;
+			*ptr = htonl(tunnel->o_seqno);
+			ptr--;
+		}
+		if (tunnel->parms.o_flags&GRE_KEY) {
+			*ptr = tunnel->parms.o_key;
+			ptr--;
+		}
+		if (tunnel->parms.o_flags&GRE_CSUM) {
+			*ptr = 0;
+			*(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1),
+				skb->len - sizeof(struct ipv6hdr));
+		}
+	}
+
+	nf_reset(skb);
+	pkt_len = skb->len;
+	err = ip6_local_out(skb);
+
+	if (net_xmit_eval(err) == 0) {
+		struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
+
+		tstats->tx_bytes += pkt_len;
+		tstats->tx_packets++;
+	} else {
+		stats->tx_errors++;
+		stats->tx_aborted_errors++;
+	}
+
+	if (ndst)
+		ip6_tnl_dst_store(tunnel, ndst);
+
+	return 0;
+tx_err_link_failure:
+	stats->tx_carrier_errors++;
+	dst_link_failure(skb);
+tx_err_dst_release:
+	dst_release(ndst);
+	return err;
+}
+
+static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	const struct iphdr  *iph = ip_hdr(skb);
+	int encap_limit = -1;
+	struct flowi6 fl6;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_IPIP;
+
+	dsfield = ipv4_get_dsfield(iph);
+
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+					  & IPV6_TCLASS_MASK;
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+		fl6.flowi6_mark = skb->mark;
+
+	err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+	if (err != 0) {
+		/* XXX: send ICMP error even if DF is not set. */
+		if (err == -EMSGSIZE)
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	int encap_limit = -1;
+	__u16 offset;
+	struct flowi6 fl6;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+		return -1;
+
+	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+	if (offset > 0) {
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+		if (tel->encap_limit == 0) {
+			icmpv6_send(skb, ICMPV6_PARAMPROB,
+				    ICMPV6_HDR_FIELD, offset + 2);
+			return -1;
+		}
+		encap_limit = tel->encap_limit - 1;
+	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+	fl6.flowi6_proto = IPPROTO_IPV6;
+
+	dsfield = ipv6_get_dsfield(ipv6h);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+		fl6.flowi6_mark = skb->mark;
+
+	err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu);
+	if (err != 0) {
+		if (err == -EMSGSIZE)
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		return -1;
+	}
+
+	return 0;
+}
+
+/**
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ *   @t: the outgoing tunnel device
+ *   @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ *   Avoid trivial tunneling loop by checking that tunnel exit-point
+ *   doesn't match source of incoming packet.
+ *
+ * Return:
+ *   1 if conflict,
+ *   0 else
+ **/
+
+static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t,
+	const struct ipv6hdr *hdr)
+{
+	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	int encap_limit = -1;
+	struct flowi6 fl6;
+	__u32 mtu;
+	int err;
+
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+	fl6.flowi6_proto = skb->protocol;
+
+	err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu);
+
+	return err;
+}
+
+static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
+	struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net_device_stats *stats = &t->dev->stats;
+	int ret;
+
+	if (!ip6_tnl_xmit_ctl(t))
+		return -1;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ret = ip6gre_xmit_ipv4(skb, dev);
+		break;
+	case htons(ETH_P_IPV6):
+		ret = ip6gre_xmit_ipv6(skb, dev);
+		break;
+	default:
+		ret = ip6gre_xmit_other(skb, dev);
+		break;
+	}
+
+	if (ret < 0)
+		goto tx_err;
+
+	return NETDEV_TX_OK;
+
+tx_err:
+	stats->tx_errors++;
+	stats->tx_dropped++;
+	kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
+{
+	struct net_device *dev = t->dev;
+	struct __ip6_tnl_parm *p = &t->parms;
+	struct flowi6 *fl6 = &t->fl.u.ip6;
+	int addend = sizeof(struct ipv6hdr) + 4;
+
+	if (dev->type != ARPHRD_ETHER) {
+		memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+		memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+	}
+
+	/* Set up flowi template */
+	fl6->saddr = p->laddr;
+	fl6->daddr = p->raddr;
+	fl6->flowi6_oif = p->link;
+	fl6->flowlabel = 0;
+
+	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+		fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+		fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+
+	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
+
+	if (p->flags&IP6_TNL_F_CAP_XMIT &&
+			p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER)
+		dev->flags |= IFF_POINTOPOINT;
+	else
+		dev->flags &= ~IFF_POINTOPOINT;
+
+	dev->iflink = p->link;
+
+	/* Precalculate GRE options length */
+	if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
+		if (t->parms.o_flags&GRE_CSUM)
+			addend += 4;
+		if (t->parms.o_flags&GRE_KEY)
+			addend += 4;
+		if (t->parms.o_flags&GRE_SEQ)
+			addend += 4;
+	}
+
+	if (p->flags & IP6_TNL_F_CAP_XMIT) {
+		int strict = (ipv6_addr_type(&p->raddr) &
+			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL));
+
+		struct rt6_info *rt = rt6_lookup(dev_net(dev),
+						 &p->raddr, &p->laddr,
+						 p->link, strict);
+
+		if (rt == NULL)
+			return;
+
+		if (rt->dst.dev) {
+			dev->hard_header_len = rt->dst.dev->hard_header_len + addend;
+
+			if (set_mtu) {
+				dev->mtu = rt->dst.dev->mtu - addend;
+				if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+					dev->mtu -= 8;
+
+				if (dev->mtu < IPV6_MIN_MTU)
+					dev->mtu = IPV6_MIN_MTU;
+			}
+		}
+		dst_release(&rt->dst);
+	}
+
+	t->hlen = addend;
+}
+
+static int ip6gre_tnl_change(struct ip6_tnl *t,
+	const struct __ip6_tnl_parm *p, int set_mtu)
+{
+	t->parms.laddr = p->laddr;
+	t->parms.raddr = p->raddr;
+	t->parms.flags = p->flags;
+	t->parms.hop_limit = p->hop_limit;
+	t->parms.encap_limit = p->encap_limit;
+	t->parms.flowinfo = p->flowinfo;
+	t->parms.link = p->link;
+	t->parms.proto = p->proto;
+	t->parms.i_key = p->i_key;
+	t->parms.o_key = p->o_key;
+	t->parms.i_flags = p->i_flags;
+	t->parms.o_flags = p->o_flags;
+	ip6_tnl_dst_reset(t);
+	ip6gre_tnl_link_config(t, set_mtu);
+	return 0;
+}
+
+static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p,
+	const struct ip6_tnl_parm2 *u)
+{
+	p->laddr = u->laddr;
+	p->raddr = u->raddr;
+	p->flags = u->flags;
+	p->hop_limit = u->hop_limit;
+	p->encap_limit = u->encap_limit;
+	p->flowinfo = u->flowinfo;
+	p->link = u->link;
+	p->i_key = u->i_key;
+	p->o_key = u->o_key;
+	p->i_flags = u->i_flags;
+	p->o_flags = u->o_flags;
+	memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
+	const struct __ip6_tnl_parm *p)
+{
+	u->proto = IPPROTO_GRE;
+	u->laddr = p->laddr;
+	u->raddr = p->raddr;
+	u->flags = p->flags;
+	u->hop_limit = p->hop_limit;
+	u->encap_limit = p->encap_limit;
+	u->flowinfo = p->flowinfo;
+	u->link = p->link;
+	u->i_key = p->i_key;
+	u->o_key = p->o_key;
+	u->i_flags = p->i_flags;
+	u->o_flags = p->o_flags;
+	memcpy(u->name, p->name, sizeof(u->name));
+}
+
+static int ip6gre_tunnel_ioctl(struct net_device *dev,
+	struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip6_tnl_parm2 p;
+	struct __ip6_tnl_parm p1;
+	struct ip6_tnl *t;
+	struct net *net = dev_net(dev);
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		t = NULL;
+		if (dev == ign->fb_tunnel_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			ip6gre_tnl_parm_from_user(&p1, &p);
+			t = ip6gre_tunnel_locate(net, &p1, 0);
+		}
+		if (t == NULL)
+			t = netdev_priv(dev);
+		ip6gre_tnl_parm_to_user(&p, &t->parms);
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			goto done;
+
+		err = -EINVAL;
+		if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))
+			goto done;
+
+		if (!(p.i_flags&GRE_KEY))
+			p.i_key = 0;
+		if (!(p.o_flags&GRE_KEY))
+			p.o_key = 0;
+
+		ip6gre_tnl_parm_from_user(&p1, &p);
+		t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL);
+
+		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				t = netdev_priv(dev);
+
+				ip6gre_tunnel_unlink(ign, t);
+				synchronize_net();
+				ip6gre_tnl_change(t, &p1, 1);
+				ip6gre_tunnel_link(ign, t);
+				netdev_state_change(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+
+			ip6gre_tnl_parm_to_user(&p, &t->parms);
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+				err = -EFAULT;
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == ign->fb_tunnel_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				goto done;
+			err = -ENOENT;
+			ip6gre_tnl_parm_from_user(&p1, &p);
+			t = ip6gre_tunnel_locate(net, &p1, 0);
+			if (t == NULL)
+				goto done;
+			err = -EPERM;
+			if (t == netdev_priv(ign->fb_tunnel_dev))
+				goto done;
+			dev = t->dev;
+		}
+		unregister_netdevice(dev);
+		err = 0;
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	return err;
+}
+
+static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+	if (new_mtu < 68 ||
+	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
+			unsigned short type,
+			const void *daddr, const void *saddr, unsigned int len)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
+	__be16 *p = (__be16 *)(ipv6h+1);
+
+	*(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000);
+	ipv6h->hop_limit = t->parms.hop_limit;
+	ipv6h->nexthdr = NEXTHDR_GRE;
+	ipv6h->saddr = t->parms.laddr;
+	ipv6h->daddr = t->parms.raddr;
+
+	p[0]		= t->parms.o_flags;
+	p[1]		= htons(type);
+
+	/*
+	 *	Set the source hardware address.
+	 */
+
+	if (saddr)
+		memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr));
+	if (daddr)
+		memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr));
+	if (!ipv6_addr_any(&ipv6h->daddr))
+		return t->hlen;
+
+	return -t->hlen;
+}
+
+static int ip6gre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+{
+	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb_mac_header(skb);
+	memcpy(haddr, &ipv6h->saddr, sizeof(struct in6_addr));
+	return sizeof(struct in6_addr);
+}
+
+static const struct header_ops ip6gre_header_ops = {
+	.create	= ip6gre_header,
+	.parse	= ip6gre_header_parse,
+};
+
+static const struct net_device_ops ip6gre_netdev_ops = {
+	.ndo_init		= ip6gre_tunnel_init,
+	.ndo_uninit		= ip6gre_tunnel_uninit,
+	.ndo_start_xmit		= ip6gre_tunnel_xmit,
+	.ndo_do_ioctl		= ip6gre_tunnel_ioctl,
+	.ndo_change_mtu		= ip6gre_tunnel_change_mtu,
+	.ndo_get_stats64	= ip6gre_get_stats64,
+};
+
+static void ip6gre_dev_free(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+	free_netdev(dev);
+}
+
+static void ip6gre_tunnel_setup(struct net_device *dev)
+{
+	struct ip6_tnl *t;
+
+	dev->netdev_ops = &ip6gre_netdev_ops;
+	dev->destructor = ip6gre_dev_free;
+
+	dev->type = ARPHRD_IP6GRE;
+	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4;
+	dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4;
+	t = netdev_priv(dev);
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		dev->mtu -= 8;
+	dev->flags |= IFF_NOARP;
+	dev->iflink = 0;
+	dev->addr_len = sizeof(struct in6_addr);
+	dev->features |= NETIF_F_NETNS_LOCAL;
+	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+}
+
+static int ip6gre_tunnel_init(struct net_device *dev)
+{
+	struct ip6_tnl *tunnel;
+
+	tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
+	memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
+
+	if (ipv6_addr_any(&tunnel->parms.raddr))
+		dev->header_ops = &ip6gre_header_ops;
+
+	dev->tstats = alloc_percpu(struct pcpu_tstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ip6gre_fb_tunnel_init(struct net_device *dev)
+{
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	tunnel->hlen		= sizeof(struct ipv6hdr) + 4;
+
+	dev_hold(dev);
+}
+
+
+static struct inet6_protocol ip6gre_protocol __read_mostly = {
+	.handler     = ip6gre_rcv,
+	.err_handler = ip6gre_err,
+	.flags       = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
+static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,
+	struct list_head *head)
+{
+	int prio;
+
+	for (prio = 0; prio < 4; prio++) {
+		int h;
+		for (h = 0; h < HASH_SIZE; h++) {
+			struct ip6_tnl *t;
+
+			t = rtnl_dereference(ign->tunnels[prio][h]);
+
+			while (t != NULL) {
+				unregister_netdevice_queue(t->dev, head);
+				t = rtnl_dereference(t->next);
+			}
+		}
+	}
+}
+
+static int __net_init ip6gre_init_net(struct net *net)
+{
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	int err;
+
+	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
+					   ip6gre_tunnel_setup);
+	if (!ign->fb_tunnel_dev) {
+		err = -ENOMEM;
+		goto err_alloc_dev;
+	}
+	dev_net_set(ign->fb_tunnel_dev, net);
+
+	ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);
+	ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops;
+
+	err = register_netdev(ign->fb_tunnel_dev);
+	if (err)
+		goto err_reg_dev;
+
+	rcu_assign_pointer(ign->tunnels_wc[0],
+			   netdev_priv(ign->fb_tunnel_dev));
+	return 0;
+
+err_reg_dev:
+	ip6gre_dev_free(ign->fb_tunnel_dev);
+err_alloc_dev:
+	return err;
+}
+
+static void __net_exit ip6gre_exit_net(struct net *net)
+{
+	struct ip6gre_net *ign;
+	LIST_HEAD(list);
+
+	ign = net_generic(net, ip6gre_net_id);
+	rtnl_lock();
+	ip6gre_destroy_tunnels(ign, &list);
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
+}
+
+static struct pernet_operations ip6gre_net_ops = {
+	.init = ip6gre_init_net,
+	.exit = ip6gre_exit_net,
+	.id   = &ip6gre_net_id,
+	.size = sizeof(struct ip6gre_net),
+};
+
+static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	__be16 flags;
+
+	if (!data)
+		return 0;
+
+	flags = 0;
+	if (data[IFLA_GRE_IFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+	if (data[IFLA_GRE_OFLAGS])
+		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+	if (flags & (GRE_VERSION|GRE_ROUTING))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	struct in6_addr daddr;
+
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+
+	if (!data)
+		goto out;
+
+	if (data[IFLA_GRE_REMOTE]) {
+		nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+		if (ipv6_addr_any(&daddr))
+			return -EINVAL;
+	}
+
+out:
+	return ip6gre_tunnel_validate(tb, data);
+}
+
+
+static void ip6gre_netlink_parms(struct nlattr *data[],
+				struct __ip6_tnl_parm *parms)
+{
+	memset(parms, 0, sizeof(*parms));
+
+	if (!data)
+		return;
+
+	if (data[IFLA_GRE_LINK])
+		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+	if (data[IFLA_GRE_IFLAGS])
+		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
+
+	if (data[IFLA_GRE_OFLAGS])
+		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
+
+	if (data[IFLA_GRE_IKEY])
+		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+	if (data[IFLA_GRE_OKEY])
+		parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+	if (data[IFLA_GRE_LOCAL])
+		nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr));
+
+	if (data[IFLA_GRE_REMOTE])
+		nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr));
+
+	if (data[IFLA_GRE_TTL])
+		parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]);
+
+	if (data[IFLA_GRE_ENCAP_LIMIT])
+		parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
+
+	if (data[IFLA_GRE_FLOWINFO])
+		parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+
+	if (data[IFLA_GRE_FLAGS])
+		parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
+}
+
+static int ip6gre_tap_init(struct net_device *dev)
+{
+	struct ip6_tnl *tunnel;
+
+	tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	ip6gre_tnl_link_config(tunnel, 1);
+
+	dev->tstats = alloc_percpu(struct pcpu_tstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static const struct net_device_ops ip6gre_tap_netdev_ops = {
+	.ndo_init = ip6gre_tap_init,
+	.ndo_uninit = ip6gre_tunnel_uninit,
+	.ndo_start_xmit = ip6gre_tunnel_xmit,
+	.ndo_set_mac_address = eth_mac_addr,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ip6gre_tunnel_change_mtu,
+	.ndo_get_stats64 = ip6gre_get_stats64,
+};
+
+static void ip6gre_tap_setup(struct net_device *dev)
+{
+
+	ether_setup(dev);
+
+	dev->netdev_ops = &ip6gre_tap_netdev_ops;
+	dev->destructor = ip6gre_dev_free;
+
+	dev->iflink = 0;
+	dev->features |= NETIF_F_NETNS_LOCAL;
+}
+
+static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
+	struct nlattr *tb[], struct nlattr *data[])
+{
+	struct ip6_tnl *nt;
+	struct net *net = dev_net(dev);
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	int err;
+
+	nt = netdev_priv(dev);
+	ip6gre_netlink_parms(data, &nt->parms);
+
+	if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+		return -EEXIST;
+
+	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
+		eth_hw_addr_random(dev);
+
+	nt->dev = dev;
+	ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
+
+	/* Can use a lockless transmit, unless we generate output sequences */
+	if (!(nt->parms.o_flags & GRE_SEQ))
+		dev->features |= NETIF_F_LLTX;
+
+	err = register_netdevice(dev);
+	if (err)
+		goto out;
+
+	dev_hold(dev);
+	ip6gre_tunnel_link(ign, nt);
+
+out:
+	return err;
+}
+
+static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
+			    struct nlattr *data[])
+{
+	struct ip6_tnl *t, *nt;
+	struct net *net = dev_net(dev);
+	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
+	struct __ip6_tnl_parm p;
+
+	if (dev == ign->fb_tunnel_dev)
+		return -EINVAL;
+
+	nt = netdev_priv(dev);
+	ip6gre_netlink_parms(data, &p);
+
+	t = ip6gre_tunnel_locate(net, &p, 0);
+
+	if (t) {
+		if (t->dev != dev)
+			return -EEXIST;
+	} else {
+		t = nt;
+
+		ip6gre_tunnel_unlink(ign, t);
+		ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+		ip6gre_tunnel_link(ign, t);
+		netdev_state_change(dev);
+	}
+
+	return 0;
+}
+
+static size_t ip6gre_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_GRE_LINK */
+		nla_total_size(4) +
+		/* IFLA_GRE_IFLAGS */
+		nla_total_size(2) +
+		/* IFLA_GRE_OFLAGS */
+		nla_total_size(2) +
+		/* IFLA_GRE_IKEY */
+		nla_total_size(4) +
+		/* IFLA_GRE_OKEY */
+		nla_total_size(4) +
+		/* IFLA_GRE_LOCAL */
+		nla_total_size(4) +
+		/* IFLA_GRE_REMOTE */
+		nla_total_size(4) +
+		/* IFLA_GRE_TTL */
+		nla_total_size(1) +
+		/* IFLA_GRE_TOS */
+		nla_total_size(1) +
+		/* IFLA_GRE_ENCAP_LIMIT */
+		nla_total_size(1) +
+		/* IFLA_GRE_FLOWINFO */
+		nla_total_size(4) +
+		/* IFLA_GRE_FLAGS */
+		nla_total_size(4) +
+		0;
+}
+
+static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct __ip6_tnl_parm *p = &t->parms;
+
+	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
+	    nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
+	    nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
+	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
+	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
+	    nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) ||
+	    nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) ||
+	    nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) ||
+	    /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/
+	    nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
+	    nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
+	    nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
+	[IFLA_GRE_LINK]        = { .type = NLA_U32 },
+	[IFLA_GRE_IFLAGS]      = { .type = NLA_U16 },
+	[IFLA_GRE_OFLAGS]      = { .type = NLA_U16 },
+	[IFLA_GRE_IKEY]        = { .type = NLA_U32 },
+	[IFLA_GRE_OKEY]        = { .type = NLA_U32 },
+	[IFLA_GRE_LOCAL]       = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
+	[IFLA_GRE_REMOTE]      = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+	[IFLA_GRE_TTL]         = { .type = NLA_U8 },
+	[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
+	[IFLA_GRE_FLOWINFO]    = { .type = NLA_U32 },
+	[IFLA_GRE_FLAGS]       = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
+	.kind		= "ip6gre",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ip6gre_policy,
+	.priv_size	= sizeof(struct ip6_tnl),
+	.setup		= ip6gre_tunnel_setup,
+	.validate	= ip6gre_tunnel_validate,
+	.newlink	= ip6gre_newlink,
+	.changelink	= ip6gre_changelink,
+	.get_size	= ip6gre_get_size,
+	.fill_info	= ip6gre_fill_info,
+};
+
+static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
+	.kind		= "ip6gretap",
+	.maxtype	= IFLA_GRE_MAX,
+	.policy		= ip6gre_policy,
+	.priv_size	= sizeof(struct ip6_tnl),
+	.setup		= ip6gre_tap_setup,
+	.validate	= ip6gre_tap_validate,
+	.newlink	= ip6gre_newlink,
+	.changelink	= ip6gre_changelink,
+	.get_size	= ip6gre_get_size,
+	.fill_info	= ip6gre_fill_info,
+};
+
+/*
+ *	And now the modules code and kernel interface.
+ */
+
+static int __init ip6gre_init(void)
+{
+	int err;
+
+	pr_info("GRE over IPv6 tunneling driver\n");
+
+	err = register_pernet_device(&ip6gre_net_ops);
+	if (err < 0)
+		return err;
+
+	err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE);
+	if (err < 0) {
+		pr_info("%s: can't add protocol\n", __func__);
+		goto add_proto_failed;
+	}
+
+	err = rtnl_link_register(&ip6gre_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
+
+	err = rtnl_link_register(&ip6gre_tap_ops);
+	if (err < 0)
+		goto tap_ops_failed;
+
+out:
+	return err;
+
+tap_ops_failed:
+	rtnl_link_unregister(&ip6gre_link_ops);
+rtnl_link_failed:
+	inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+add_proto_failed:
+	unregister_pernet_device(&ip6gre_net_ops);
+	goto out;
+}
+
+static void __exit ip6gre_fini(void)
+{
+	rtnl_link_unregister(&ip6gre_tap_ops);
+	rtnl_link_unregister(&ip6gre_link_ops);
+	inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
+	unregister_pernet_device(&ip6gre_net_ops);
+}
+
+module_init(ip6gre_init);
+module_exit(ip6gre_fini);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
+MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
+MODULE_ALIAS_RTNL_LINK("ip6gre");
+MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9a1d5fe6aef8..33d2a0e6712d 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev)
  * Locking : hash tables are protected by RCU and RTNL
  */
 
-static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
 {
 	struct dst_entry *dst = t->dst_cache;
 
@@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
 
 	return dst;
 }
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
 
-static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+void ip6_tnl_dst_reset(struct ip6_tnl *t)
 {
 	dst_release(t->dst_cache);
 	t->dst_cache = NULL;
 }
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
 
-static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *) dst;
 	t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
 	dst_release(t->dst_cache);
 	t->dst_cache = dst;
 }
+EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
 
 /**
  * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
@@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
  **/
 
 static struct ip6_tnl __rcu **
-ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
 {
 	const struct in6_addr *remote = &p->raddr;
 	const struct in6_addr *local = &p->laddr;
@@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev)
  *   created tunnel or NULL
  **/
 
-static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
+static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 {
 	struct net_device *dev;
 	struct ip6_tnl *t;
@@ -322,7 +325,7 @@ failed:
  **/
 
 static struct ip6_tnl *ip6_tnl_locate(struct net *net,
-		struct ip6_tnl_parm *p, int create)
+		struct __ip6_tnl_parm *p, int create)
 {
 	const struct in6_addr *remote = &p->raddr;
 	const struct in6_addr *local = &p->laddr;
@@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
  *   else index to encapsulation limit
  **/
 
-static __u16
-parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 {
 	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
 	__u8 nexthdr = ipv6h->nexthdr;
@@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
 
 /**
  * ip6_tnl_err - tunnel error handler
@@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 	case ICMPV6_PARAMPROB:
 		teli = 0;
 		if ((*code) == ICMPV6_HDR_FIELD)
-			teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
 
 		if (teli && teli == *info - 2) {
 			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
@@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
-static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
+__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
 			     const struct in6_addr *laddr,
 			     const struct in6_addr *raddr)
 {
-	struct ip6_tnl_parm *p = &t->parms;
+	struct __ip6_tnl_parm *p = &t->parms;
 	int ltype = ipv6_addr_type(laddr);
 	int rtype = ipv6_addr_type(raddr);
 	__u32 flags = 0;
@@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
 	}
 	return flags;
 }
+EXPORT_SYMBOL(ip6_tnl_get_cap);
 
 /* called with rcu_read_lock() */
-static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
+int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 				  const struct in6_addr *laddr,
 				  const struct in6_addr *raddr)
 {
-	struct ip6_tnl_parm *p = &t->parms;
+	struct __ip6_tnl_parm *p = &t->parms;
 	int ret = 0;
 	struct net *net = dev_net(t->dev);
 
@@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
 	}
 	return ret;
 }
+EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
 
 /**
  * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
@@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
 	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 }
 
-static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
+int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
 {
-	struct ip6_tnl_parm *p = &t->parms;
+	struct __ip6_tnl_parm *p = &t->parms;
 	int ret = 0;
 	struct net *net = dev_net(t->dev);
 
@@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
 	}
 	return ret;
 }
+EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);
+
 /**
  * ip6_tnl_xmit2 - encapsulate packet and send
  *   @skb: the outgoing socket buffer
@@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	    !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
 	if (offset > 0) {
 		struct ipv6_tlv_tnl_enc_lim *tel;
 		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -1152,7 +1159,7 @@ tx_err:
 static void ip6_tnl_link_config(struct ip6_tnl *t)
 {
 	struct net_device *dev = t->dev;
-	struct ip6_tnl_parm *p = &t->parms;
+	struct __ip6_tnl_parm *p = &t->parms;
 	struct flowi6 *fl6 = &t->fl.u.ip6;
 
 	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
@@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
  **/
 
 static int
-ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p)
 {
 	t->parms.laddr = p->laddr;
 	t->parms.raddr = p->raddr;
@@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 	return 0;
 }
 
+static void
+ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
+{
+	p->laddr = u->laddr;
+	p->raddr = u->raddr;
+	p->flags = u->flags;
+	p->hop_limit = u->hop_limit;
+	p->encap_limit = u->encap_limit;
+	p->flowinfo = u->flowinfo;
+	p->link = u->link;
+	p->proto = u->proto;
+	memcpy(p->name, u->name, sizeof(u->name));
+}
+
+static void
+ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
+{
+	u->laddr = p->laddr;
+	u->raddr = p->raddr;
+	u->flags = p->flags;
+	u->hop_limit = p->hop_limit;
+	u->encap_limit = p->encap_limit;
+	u->flowinfo = p->flowinfo;
+	u->link = p->link;
+	u->proto = p->proto;
+	memcpy(u->name, p->name, sizeof(u->name));
+}
+
 /**
  * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
  *   @dev: virtual device associated with tunnel
@@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	int err = 0;
 	struct ip6_tnl_parm p;
+	struct __ip6_tnl_parm p1;
 	struct ip6_tnl *t = NULL;
 	struct net *net = dev_net(dev);
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -1274,11 +1310,12 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 				err = -EFAULT;
 				break;
 			}
-			t = ip6_tnl_locate(net, &p, 0);
+			ip6_tnl_parm_from_user(&p1, &p);
+			t = ip6_tnl_locate(net, &p1, 0);
 		}
 		if (t == NULL)
 			t = netdev_priv(dev);
-		memcpy(&p, &t->parms, sizeof (p));
+		ip6_tnl_parm_to_user(&p, &t->parms);
 		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
 			err = -EFAULT;
 		}
@@ -1295,7 +1332,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
 		    p.proto != 0)
 			break;
-		t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL);
+		ip6_tnl_parm_from_user(&p1, &p);
+		t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
 		if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
 			if (t != NULL) {
 				if (t->dev != dev) {
@@ -1307,13 +1345,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 			ip6_tnl_unlink(ip6n, t);
 			synchronize_net();
-			err = ip6_tnl_change(t, &p);
+			err = ip6_tnl_change(t, &p1);
 			ip6_tnl_link(ip6n, t);
 			netdev_state_change(dev);
 		}
 		if (t) {
 			err = 0;
-			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p)))
+			ip6_tnl_parm_to_user(&p, &t->parms);
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 				err = -EFAULT;
 
 		} else
@@ -1329,7 +1368,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
 				break;
 			err = -ENOENT;
-			if ((t = ip6_tnl_locate(net, &p, 0)) == NULL)
+			ip6_tnl_parm_from_user(&p1, &p);
+			t = ip6_tnl_locate(net, &p1, 0);
+			if (t == NULL)
 				break;
 			err = -EPERM;
 			if (t->dev == ip6n->fb_tnl_dev)
-- 
cgit v1.2.3


From 96ec6327144e1ac9e6676e34fae8b49c2102fa5a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 13 Aug 2012 05:53:28 +0000
Subject: packet: Diag core and basic socket info dumping

The diag module can be built independently from the af_packet.ko one,
just like it's done in unix sockets.

The core dumping message carries the info available at socket creation
time, i.e. family, type and protocol (in the same byte order as shown in
the proc file).

The socket inode number and cookie is reserved for future per-socket info
retrieving. The per-protocol filtering is also reserved for future by
requiring the sdiag_protocol to be zero.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild        |   1 +
 include/linux/packet_diag.h |  24 ++++++++++
 net/packet/Kconfig          |   8 ++++
 net/packet/Makefile         |   2 +
 net/packet/diag.c           | 104 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 139 insertions(+)
 create mode 100644 include/linux/packet_diag.h
 create mode 100644 net/packet/diag.c

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index d9a754474878..d823d603dadd 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -195,6 +195,7 @@ header-y += in_route.h
 header-y += sock_diag.h
 header-y += inet_diag.h
 header-y += unix_diag.h
+header-y += packet_diag.h
 header-y += inotify.h
 header-y += input.h
 header-y += ioctl.h
diff --git a/include/linux/packet_diag.h b/include/linux/packet_diag.h
new file mode 100644
index 000000000000..fa19300ca334
--- /dev/null
+++ b/include/linux/packet_diag.h
@@ -0,0 +1,24 @@
+#ifndef __PACKET_DIAG_H__
+#define __PACKET_DIAG_H__
+
+#include <linux/types.h>
+
+struct packet_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+	__u16	pad;
+	__u32	pdiag_ino;
+	__u32	pdiag_show;
+	__u32	pdiag_cookie[2];
+};
+
+struct packet_diag_msg {
+	__u8	pdiag_family;
+	__u8	pdiag_type;
+	__u16	pdiag_num;
+
+	__u32	pdiag_ino;
+	__u32	pdiag_cookie[2];
+};
+
+#endif
diff --git a/net/packet/Kconfig b/net/packet/Kconfig
index 0060e3b396b7..cc55b35f80e5 100644
--- a/net/packet/Kconfig
+++ b/net/packet/Kconfig
@@ -14,3 +14,11 @@ config PACKET
 	  be called af_packet.
 
 	  If unsure, say Y.
+
+config PACKET_DIAG
+	tristate "Packet: sockets monitoring interface"
+	depends on PACKET
+	default n
+	---help---
+	  Support for PF_PACKET sockets monitoring interface used by the ss tool.
+	  If unsure, say Y.
diff --git a/net/packet/Makefile b/net/packet/Makefile
index 81183eabfdec..9df61347a3c3 100644
--- a/net/packet/Makefile
+++ b/net/packet/Makefile
@@ -3,3 +3,5 @@
 #
 
 obj-$(CONFIG_PACKET) += af_packet.o
+obj-$(CONFIG_PACKET_DIAG) += af_packet_diag.o
+af_packet_diag-y += diag.o
diff --git a/net/packet/diag.c b/net/packet/diag.c
new file mode 100644
index 000000000000..ff2f7f5bfb8f
--- /dev/null
+++ b/net/packet/diag.c
@@ -0,0 +1,104 @@
+#include <linux/module.h>
+#include <linux/sock_diag.h>
+#include <linux/net.h>
+#include <linux/packet_diag.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "internal.h"
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
+		u32 pid, u32 seq, u32 flags, int sk_ino)
+{
+	struct nlmsghdr *nlh;
+	struct packet_diag_msg *rp;
+	const struct packet_sock *po = pkt_sk(sk);
+
+	nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	rp = nlmsg_data(nlh);
+	rp->pdiag_family = AF_PACKET;
+	rp->pdiag_type = sk->sk_type;
+	rp->pdiag_num = ntohs(po->num);
+	rp->pdiag_ino = sk_ino;
+	sock_diag_save_cookie(sk, rp->pdiag_cookie);
+
+	return nlmsg_end(skb, nlh);
+}
+
+static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int num = 0, s_num = cb->args[0];
+	struct packet_diag_req *req;
+	struct net *net;
+	struct sock *sk;
+	struct hlist_node *node;
+
+	net = sock_net(skb->sk);
+	req = nlmsg_data(cb->nlh);
+
+	rcu_read_lock();
+	sk_for_each_rcu(sk, node, &net->packet.sklist) {
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (num < s_num)
+			goto next;
+
+		if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid,
+					cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					sock_i_ino(sk)) < 0)
+			goto done;
+next:
+		num++;
+	}
+done:
+	rcu_read_unlock();
+	cb->args[0] = num;
+
+	return skb->len;
+}
+
+static int packet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct packet_diag_req);
+	struct net *net = sock_net(skb->sk);
+	struct packet_diag_req *req;
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	req = nlmsg_data(h);
+	/* Make it possible to support protocol filtering later */
+	if (req->sdiag_protocol)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = packet_diag_dump,
+		};
+		return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+	} else
+		return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler packet_diag_handler = {
+	.family = AF_PACKET,
+	.dump = packet_diag_handler_dump,
+};
+
+static int __init packet_diag_init(void)
+{
+	return sock_diag_register(&packet_diag_handler);
+}
+
+static void __exit packet_diag_exit(void)
+{
+	sock_diag_unregister(&packet_diag_handler);
+}
+
+module_init(packet_diag_init);
+module_exit(packet_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */);
-- 
cgit v1.2.3


From 8a360be0c5f8fe2c46f0a524886fa56596534193 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 13 Aug 2012 05:55:46 +0000
Subject: packet: Report more packet sk info via diag module

This reports in one rtattr message all the other scalar values, that can be
set on a packet socket with setsockopt.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/packet_diag.h | 23 +++++++++++++++++++++++
 net/packet/diag.c           | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

(limited to 'include')

diff --git a/include/linux/packet_diag.h b/include/linux/packet_diag.h
index fa19300ca334..3781ea4bc829 100644
--- a/include/linux/packet_diag.h
+++ b/include/linux/packet_diag.h
@@ -12,6 +12,8 @@ struct packet_diag_req {
 	__u32	pdiag_cookie[2];
 };
 
+#define PACKET_SHOW_INFO	0x00000001 /* Basic packet_sk information */
+
 struct packet_diag_msg {
 	__u8	pdiag_family;
 	__u8	pdiag_type;
@@ -21,4 +23,25 @@ struct packet_diag_msg {
 	__u32	pdiag_cookie[2];
 };
 
+enum {
+	PACKET_DIAG_INFO,
+
+	PACKET_DIAG_MAX,
+};
+
+struct packet_diag_info {
+	__u32	pdi_index;
+	__u32	pdi_version;
+	__u32	pdi_reserve;
+	__u32	pdi_copy_thresh;
+	__u32	pdi_tstamp;
+	__u32	pdi_flags;
+
+#define PDI_RUNNING	0x1
+#define PDI_AUXDATA	0x2
+#define PDI_ORIGDEV	0x4
+#define PDI_VNETHDR	0x8
+#define PDI_LOSS	0x10
+};
+
 #endif
diff --git a/net/packet/diag.c b/net/packet/diag.c
index ff2f7f5bfb8f..d5bd0372d004 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -7,6 +7,31 @@
 
 #include "internal.h"
 
+static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
+{
+	struct packet_diag_info pinfo;
+
+	pinfo.pdi_index = po->ifindex;
+	pinfo.pdi_version = po->tp_version;
+	pinfo.pdi_reserve = po->tp_reserve;
+	pinfo.pdi_copy_thresh = po->copy_thresh;
+	pinfo.pdi_tstamp = po->tp_tstamp;
+
+	pinfo.pdi_flags = 0;
+	if (po->running)
+		pinfo.pdi_flags |= PDI_RUNNING;
+	if (po->auxdata)
+		pinfo.pdi_flags |= PDI_AUXDATA;
+	if (po->origdev)
+		pinfo.pdi_flags |= PDI_ORIGDEV;
+	if (po->has_vnet_hdr)
+		pinfo.pdi_flags |= PDI_VNETHDR;
+	if (po->tp_loss)
+		pinfo.pdi_flags |= PDI_LOSS;
+
+	return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo);
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -25,7 +50,15 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
 	rp->pdiag_ino = sk_ino;
 	sock_diag_save_cookie(sk, rp->pdiag_cookie);
 
+	if ((req->pdiag_show & PACKET_SHOW_INFO) &&
+			pdiag_put_info(po, skb))
+		goto out_nlmsg_trim;
+
 	return nlmsg_end(skb, nlh);
+
+out_nlmsg_trim:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
 }
 
 static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
-- 
cgit v1.2.3


From eea68e2f1a0061e09265992b91fdc0014930ae92 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Mon, 13 Aug 2012 05:57:44 +0000
Subject: packet: Report socket mclist info via diag module

The info is reported as an array of packet_diag_mclist structures. Each
includes not only the directly configured values (index, type, etc), but
also the "count".

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/packet_diag.h | 10 ++++++++++
 net/packet/diag.c           | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

(limited to 'include')

diff --git a/include/linux/packet_diag.h b/include/linux/packet_diag.h
index 3781ea4bc829..ea2e8923bd7e 100644
--- a/include/linux/packet_diag.h
+++ b/include/linux/packet_diag.h
@@ -13,6 +13,7 @@ struct packet_diag_req {
 };
 
 #define PACKET_SHOW_INFO	0x00000001 /* Basic packet_sk information */
+#define PACKET_SHOW_MCLIST	0x00000002 /* A set of packet_diag_mclist-s */
 
 struct packet_diag_msg {
 	__u8	pdiag_family;
@@ -25,6 +26,7 @@ struct packet_diag_msg {
 
 enum {
 	PACKET_DIAG_INFO,
+	PACKET_DIAG_MCLIST,
 
 	PACKET_DIAG_MAX,
 };
@@ -44,4 +46,12 @@ struct packet_diag_info {
 #define PDI_LOSS	0x10
 };
 
+struct packet_diag_mclist {
+	__u32	pdmc_index;
+	__u32	pdmc_count;
+	__u16	pdmc_type;
+	__u16	pdmc_alen;
+	__u8	pdmc_addr[MAX_ADDR_LEN];
+};
+
 #endif
diff --git a/net/packet/diag.c b/net/packet/diag.c
index d5bd0372d004..3dda4ecb9a46 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -1,6 +1,7 @@
 #include <linux/module.h>
 #include <linux/sock_diag.h>
 #include <linux/net.h>
+#include <linux/netdevice.h>
 #include <linux/packet_diag.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -32,6 +33,40 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
 	return nla_put(nlskb, PACKET_DIAG_INFO, sizeof(pinfo), &pinfo);
 }
 
+static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb)
+{
+	struct nlattr *mca;
+	struct packet_mclist *ml;
+
+	mca = nla_nest_start(nlskb, PACKET_DIAG_MCLIST);
+	if (!mca)
+		return -EMSGSIZE;
+
+	rtnl_lock();
+	for (ml = po->mclist; ml; ml = ml->next) {
+		struct packet_diag_mclist *dml;
+
+		dml = nla_reserve_nohdr(nlskb, sizeof(*dml));
+		if (!dml) {
+			rtnl_unlock();
+			nla_nest_cancel(nlskb, mca);
+			return -EMSGSIZE;
+		}
+
+		dml->pdmc_index = ml->ifindex;
+		dml->pdmc_type = ml->type;
+		dml->pdmc_alen = ml->alen;
+		dml->pdmc_count = ml->count;
+		BUILD_BUG_ON(sizeof(dml->pdmc_addr) != sizeof(ml->addr));
+		memcpy(dml->pdmc_addr, ml->addr, sizeof(ml->addr));
+	}
+
+	rtnl_unlock();
+	nla_nest_end(nlskb, mca);
+
+	return 0;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -54,6 +89,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
 			pdiag_put_info(po, skb))
 		goto out_nlmsg_trim;
 
+	if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
+			pdiag_put_mclist(po, skb))
+		goto out_nlmsg_trim;
+
 	return nlmsg_end(skb, nlh);
 
 out_nlmsg_trim:
-- 
cgit v1.2.3


From 57f5d0d1d9f8e59819cb0ab4b707364c54b5b2d1 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Date: Fri, 27 Jul 2012 19:32:54 -0300
Subject: Bluetooth: Remove some functions from being exported

Some connection related functions are only used inside hci_conn.c
so no need to have them exported.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@openbossa.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h | 2 --
 net/bluetooth/hci_conn.c         | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 41d943926d2c..7267dafd7159 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -551,9 +551,7 @@ static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
 	return NULL;
 }
 
-void hci_acl_connect(struct hci_conn *conn);
 void hci_acl_disconn(struct hci_conn *conn, __u8 reason);
-void hci_add_sco(struct hci_conn *conn, __u16 handle);
 void hci_setup_sync(struct hci_conn *conn, __u16 handle);
 void hci_sco_setup(struct hci_conn *conn, __u8 status);
 
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 5ad7da217474..724eea980812 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -59,7 +59,7 @@ static void hci_le_connect_cancel(struct hci_conn *conn)
 	hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
 }
 
-void hci_acl_connect(struct hci_conn *conn)
+static void hci_acl_connect(struct hci_conn *conn)
 {
 	struct hci_dev *hdev = conn->hdev;
 	struct inquiry_entry *ie;
@@ -129,7 +129,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
 	hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp);
 }
 
-void hci_add_sco(struct hci_conn *conn, __u16 handle)
+static void hci_add_sco(struct hci_conn *conn, __u16 handle)
 {
 	struct hci_dev *hdev = conn->hdev;
 	struct hci_cp_add_sco cp;
-- 
cgit v1.2.3


From f1f4376307ca45558eb22487022aefceed7385e8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 6 Aug 2012 08:39:38 +0000
Subject: sctp: Make the port hash table use struct net in it's key.

- Add struct net into the port hash table hash calculation
- Add struct net inot the struct sctp_bind_bucket so there
  is a memory of which network namespace a port is allocated in.
  No need for a ref count because sctp_bind_bucket only exists
  when there are sockets in the hash table and sockets can not
  change their network namspace, and sockets already ref count
  their network namespace.
- Add struct net into the key comparison when we are testing
  to see if we have found the port hash table entry we are
  looking for.

With these changes lookups in the port hash table becomes
safe to use in multiple network namespaces.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h    |  4 ++--
 include/net/sctp/structs.h |  1 +
 net/sctp/socket.c          | 22 +++++++++++++---------
 3 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index ff499640528b..7c0504034583 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -632,9 +632,9 @@ static inline int sctp_sanity_check(void)
 
 /* Warning: The following hash functions assume a power of two 'size'. */
 /* This is the hash function for the SCTP port hash table. */
-static inline int sctp_phashfn(__u16 lport)
+static inline int sctp_phashfn(struct net *net, __u16 lport)
 {
-	return lport & (sctp_port_hashsize - 1);
+	return (net_hash_mix(net) + lport) & (sctp_port_hashsize - 1);
 }
 
 /* This is the hash function for the endpoint hash table. */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index fc5e60016e37..c089bb12af77 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -102,6 +102,7 @@ struct sctp_bind_bucket {
 	unsigned short	fastreuse;
 	struct hlist_node	node;
 	struct hlist_head	owner;
+	struct net	*net;
 };
 
 struct sctp_bind_hashbucket {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 5e259817a7f3..4316b0f988d4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5769,7 +5769,7 @@ static void sctp_unhash(struct sock *sk)
  * a fastreuse flag (FIXME: NPI ipg).
  */
 static struct sctp_bind_bucket *sctp_bucket_create(
-	struct sctp_bind_hashbucket *head, unsigned short snum);
+	struct sctp_bind_hashbucket *head, struct net *, unsigned short snum);
 
 static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 {
@@ -5799,11 +5799,12 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 				rover = low;
 			if (inet_is_reserved_local_port(rover))
 				continue;
-			index = sctp_phashfn(rover);
+			index = sctp_phashfn(sock_net(sk), rover);
 			head = &sctp_port_hashtable[index];
 			sctp_spin_lock(&head->lock);
 			sctp_for_each_hentry(pp, node, &head->chain)
-				if (pp->port == rover)
+				if ((pp->port == rover) &&
+				    net_eq(sock_net(sk), pp->net))
 					goto next;
 			break;
 		next:
@@ -5827,10 +5828,10 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 		 * to the port number (snum) - we detect that with the
 		 * port iterator, pp being NULL.
 		 */
-		head = &sctp_port_hashtable[sctp_phashfn(snum)];
+		head = &sctp_port_hashtable[sctp_phashfn(sock_net(sk), snum)];
 		sctp_spin_lock(&head->lock);
 		sctp_for_each_hentry(pp, node, &head->chain) {
-			if (pp->port == snum)
+			if ((pp->port == snum) && net_eq(pp->net, sock_net(sk)))
 				goto pp_found;
 		}
 	}
@@ -5881,7 +5882,7 @@ pp_found:
 pp_not_found:
 	/* If there was a hash table miss, create a new port.  */
 	ret = 1;
-	if (!pp && !(pp = sctp_bucket_create(head, snum)))
+	if (!pp && !(pp = sctp_bucket_create(head, sock_net(sk), snum)))
 		goto fail_unlock;
 
 	/* In either case (hit or miss), make sure fastreuse is 1 only
@@ -6113,7 +6114,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
  ********************************************************************/
 
 static struct sctp_bind_bucket *sctp_bucket_create(
-	struct sctp_bind_hashbucket *head, unsigned short snum)
+	struct sctp_bind_hashbucket *head, struct net *net, unsigned short snum)
 {
 	struct sctp_bind_bucket *pp;
 
@@ -6123,6 +6124,7 @@ static struct sctp_bind_bucket *sctp_bucket_create(
 		pp->port = snum;
 		pp->fastreuse = 0;
 		INIT_HLIST_HEAD(&pp->owner);
+		pp->net = net;
 		hlist_add_head(&pp->node, &head->chain);
 	}
 	return pp;
@@ -6142,7 +6144,8 @@ static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
 static inline void __sctp_put_port(struct sock *sk)
 {
 	struct sctp_bind_hashbucket *head =
-		&sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->inet_num)];
+		&sctp_port_hashtable[sctp_phashfn(sock_net(sk),
+						  inet_sk(sk)->inet_num)];
 	struct sctp_bind_bucket *pp;
 
 	sctp_spin_lock(&head->lock);
@@ -6809,7 +6812,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	newsp->hmac = NULL;
 
 	/* Hook this new socket in to the bind_hash list. */
-	head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->inet_num)];
+	head = &sctp_port_hashtable[sctp_phashfn(sock_net(oldsk),
+						 inet_sk(oldsk)->inet_num)];
 	sctp_local_bh_disable();
 	sctp_spin_lock(&head->lock);
 	pp = sctp_sk(oldsk)->bind_hash;
-- 
cgit v1.2.3


From 4cdadcbcb64bdf3ae8bdf3ef5bb2b91c85444cfa Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 6 Aug 2012 08:40:21 +0000
Subject: sctp: Make the endpoint hashtable handle multiple network namespaces

- Use struct net in the hash calculation
- Use sock_net(endpoint.base.sk) in the endpoint lookups.
- On receive calculate the network namespace from skb->dev.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h    |  4 ++--
 include/net/sctp/structs.h |  2 +-
 net/sctp/endpointola.c     |  4 +++-
 net/sctp/input.c           | 19 ++++++++++++-------
 4 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 7c0504034583..87b119f74c4a 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -638,9 +638,9 @@ static inline int sctp_phashfn(struct net *net, __u16 lport)
 }
 
 /* This is the hash function for the endpoint hash table. */
-static inline int sctp_ep_hashfn(__u16 lport)
+static inline int sctp_ep_hashfn(struct net *net, __u16 lport)
 {
-	return lport & (sctp_ep_hashsize - 1);
+	return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1);
 }
 
 /* This is the hash function for the association hash table. */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index c089bb12af77..9f9de558541f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1426,7 +1426,7 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
 int sctp_endpoint_is_peeled_off(struct sctp_endpoint *,
 				const union sctp_addr *);
 struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
-					const union sctp_addr *);
+					struct net *, const union sctp_addr *);
 int sctp_has_association(const union sctp_addr *laddr,
 			 const union sctp_addr *paddr);
 
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 68a385d7c3bd..50c87b4ad765 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -302,11 +302,13 @@ void sctp_endpoint_put(struct sctp_endpoint *ep)
 
 /* Is this the endpoint we are looking for?  */
 struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
+					       struct net *net,
 					       const union sctp_addr *laddr)
 {
 	struct sctp_endpoint *retval = NULL;
 
-	if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
+	if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) &&
+	    net_eq(sock_net(ep->base.sk), net)) {
 		if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
 					 sctp_sk(ep->base.sk)))
 			retval = ep;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index e64d5210ed13..c0ca893ab1d1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -70,7 +70,8 @@ static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
 				      const union sctp_addr *laddr,
 				      const union sctp_addr *paddr,
 				      struct sctp_transport **transportp);
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr);
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
+						const union sctp_addr *laddr);
 static struct sctp_association *__sctp_lookup_association(
 					const union sctp_addr *local,
 					const union sctp_addr *peer,
@@ -129,6 +130,7 @@ int sctp_rcv(struct sk_buff *skb)
 	union sctp_addr dest;
 	int family;
 	struct sctp_af *af;
+	struct net *net = dev_net(skb->dev);
 
 	if (skb->pkt_type!=PACKET_HOST)
 		goto discard_it;
@@ -181,7 +183,7 @@ int sctp_rcv(struct sk_buff *skb)
 	asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport);
 
 	if (!asoc)
-		ep = __sctp_rcv_lookup_endpoint(&dest);
+		ep = __sctp_rcv_lookup_endpoint(net, &dest);
 
 	/* Retrieve the common input handling substructure. */
 	rcvr = asoc ? &asoc->base : &ep->base;
@@ -723,12 +725,13 @@ discard:
 /* Insert endpoint into the hash table.  */
 static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
 {
+	struct net *net = sock_net(ep->base.sk);
 	struct sctp_ep_common *epb;
 	struct sctp_hashbucket *head;
 
 	epb = &ep->base;
 
-	epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+	epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
 	head = &sctp_ep_hashtable[epb->hashent];
 
 	sctp_write_lock(&head->lock);
@@ -747,12 +750,13 @@ void sctp_hash_endpoint(struct sctp_endpoint *ep)
 /* Remove endpoint from the hash table.  */
 static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
 {
+	struct net *net = sock_net(ep->base.sk);
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 
 	epb = &ep->base;
 
-	epb->hashent = sctp_ep_hashfn(epb->bind_addr.port);
+	epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
 
 	head = &sctp_ep_hashtable[epb->hashent];
 
@@ -770,7 +774,8 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep)
 }
 
 /* Look up an endpoint. */
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *laddr)
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
+						const union sctp_addr *laddr)
 {
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
@@ -778,12 +783,12 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(const union sctp_addr *l
 	struct hlist_node *node;
 	int hash;
 
-	hash = sctp_ep_hashfn(ntohs(laddr->v4.sin_port));
+	hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port));
 	head = &sctp_ep_hashtable[hash];
 	read_lock(&head->lock);
 	sctp_for_each_hentry(epb, node, &head->chain) {
 		ep = sctp_ep(epb);
-		if (sctp_endpoint_is_match(ep, laddr))
+		if (sctp_endpoint_is_match(ep, net, laddr))
 			goto hit;
 	}
 
-- 
cgit v1.2.3


From 4110cc255ddec59c79fba4d71cdd948d0a382140 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 6 Aug 2012 08:41:13 +0000
Subject: sctp: Make the association hashtable handle multiple network
 namespaces

- Use struct net in the hash calculation
- Use sock_net(association.base.sk) in the association lookups.
- On receive calculate the network namespace from skb->dev.
- Pass struct net from receive down to the functions that actually
  do the association lookup.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h    |  6 ++---
 include/net/sctp/structs.h |  3 ++-
 net/sctp/associola.c       |  4 ++-
 net/sctp/endpointola.c     |  6 +++--
 net/sctp/input.c           | 64 +++++++++++++++++++++++++++++-----------------
 net/sctp/ipv6.c            |  3 ++-
 6 files changed, 54 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 87b119f74c4a..640915a0613d 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -156,7 +156,7 @@ void sctp_hash_established(struct sctp_association *);
 void sctp_unhash_established(struct sctp_association *);
 void sctp_hash_endpoint(struct sctp_endpoint *);
 void sctp_unhash_endpoint(struct sctp_endpoint *);
-struct sock *sctp_err_lookup(int family, struct sk_buff *,
+struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
 			     struct sctphdr *, struct sctp_association **,
 			     struct sctp_transport **);
 void sctp_err_finish(struct sock *, struct sctp_association *);
@@ -644,9 +644,9 @@ static inline int sctp_ep_hashfn(struct net *net, __u16 lport)
 }
 
 /* This is the hash function for the association hash table. */
-static inline int sctp_assoc_hashfn(__u16 lport, __u16 rport)
+static inline int sctp_assoc_hashfn(struct net *net, __u16 lport, __u16 rport)
 {
-	int h = (lport << 16) + rport;
+	int h = (lport << 16) + rport + net_hash_mix(net);
 	h ^= h>>8;
 	return h & (sctp_assoc_hashsize - 1);
 }
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 9f9de558541f..c0563d1dd7c7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1427,7 +1427,7 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *,
 				const union sctp_addr *);
 struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
 					struct net *, const union sctp_addr *);
-int sctp_has_association(const union sctp_addr *laddr,
+int sctp_has_association(struct net *net, const union sctp_addr *laddr,
 			 const union sctp_addr *paddr);
 
 int sctp_verify_init(const struct sctp_association *asoc, sctp_cid_t,
@@ -2014,6 +2014,7 @@ void sctp_assoc_control_transport(struct sctp_association *,
 				  sctp_transport_cmd_t, sctp_sn_error_t);
 struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32);
 struct sctp_transport *sctp_assoc_is_match(struct sctp_association *,
+					   struct net *,
 					   const union sctp_addr *,
 					   const union sctp_addr *);
 void sctp_assoc_migrate(struct sctp_association *, struct sock *);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ebaef3ed6065..a3601f35ac15 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1089,13 +1089,15 @@ out:
 
 /* Is this the association we are looking for? */
 struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
+					   struct net *net,
 					   const union sctp_addr *laddr,
 					   const union sctp_addr *paddr)
 {
 	struct sctp_transport *transport;
 
 	if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
-	    (htons(asoc->peer.port) == paddr->v4.sin_port)) {
+	    (htons(asoc->peer.port) == paddr->v4.sin_port) &&
+	    net_eq(sock_net(asoc->base.sk), net)) {
 		transport = sctp_assoc_lookup_paddr(asoc, paddr);
 		if (!transport)
 			goto out;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 50c87b4ad765..6b763939345b 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -345,7 +345,8 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
 
 	rport = ntohs(paddr->v4.sin_port);
 
-	hash = sctp_assoc_hashfn(ep->base.bind_addr.port, rport);
+	hash = sctp_assoc_hashfn(sock_net(ep->base.sk), ep->base.bind_addr.port,
+				 rport);
 	head = &sctp_assoc_hashtable[hash];
 	read_lock(&head->lock);
 	sctp_for_each_hentry(epb, node, &head->chain) {
@@ -388,13 +389,14 @@ int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
 {
 	struct sctp_sockaddr_entry *addr;
 	struct sctp_bind_addr *bp;
+	struct net *net = sock_net(ep->base.sk);
 
 	bp = &ep->base.bind_addr;
 	/* This function is called with the socket lock held,
 	 * so the address_list can not change.
 	 */
 	list_for_each_entry(addr, &bp->address_list, list) {
-		if (sctp_has_association(&addr->a, paddr))
+		if (sctp_has_association(net, &addr->a, paddr))
 			return 1;
 	}
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index c0ca893ab1d1..a7e9a85b5acb 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -66,13 +66,15 @@
 
 /* Forward declarations for internal helpers. */
 static int sctp_rcv_ootb(struct sk_buff *);
-static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_lookup(struct net *net,
+				      struct sk_buff *skb,
 				      const union sctp_addr *laddr,
 				      const union sctp_addr *paddr,
 				      struct sctp_transport **transportp);
 static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
 						const union sctp_addr *laddr);
 static struct sctp_association *__sctp_lookup_association(
+					struct net *net,
 					const union sctp_addr *local,
 					const union sctp_addr *peer,
 					struct sctp_transport **pt);
@@ -180,7 +182,7 @@ int sctp_rcv(struct sk_buff *skb)
 	    !af->addr_valid(&dest, NULL, skb))
 		goto discard_it;
 
-	asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport);
+	asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport);
 
 	if (!asoc)
 		ep = __sctp_rcv_lookup_endpoint(net, &dest);
@@ -476,7 +478,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 }
 
 /* Common lookup code for icmp/icmpv6 error handler. */
-struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
+struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 			     struct sctphdr *sctphdr,
 			     struct sctp_association **app,
 			     struct sctp_transport **tpp)
@@ -505,7 +507,7 @@ struct sock *sctp_err_lookup(int family, struct sk_buff *skb,
 	/* Look for an association that matches the incoming ICMP error
 	 * packet.
 	 */
-	asoc = __sctp_lookup_association(&saddr, &daddr, &transport);
+	asoc = __sctp_lookup_association(net, &saddr, &daddr, &transport);
 	if (!asoc)
 		return NULL;
 
@@ -588,6 +590,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	struct inet_sock *inet;
 	sk_buff_data_t saveip, savesctp;
 	int err;
+	struct net *net = dev_net(skb->dev);
 
 	if (skb->len < ihlen + 8) {
 		ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
@@ -599,7 +602,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	savesctp = skb->transport_header;
 	skb_reset_network_header(skb);
 	skb_set_transport_header(skb, ihlen);
-	sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
+	sk = sctp_err_lookup(net, AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
 	/* Put back, the original values. */
 	skb->network_header = saveip;
 	skb->transport_header = savesctp;
@@ -803,13 +806,15 @@ hit:
 /* Insert association into the hash table.  */
 static void __sctp_hash_established(struct sctp_association *asoc)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_ep_common *epb;
 	struct sctp_hashbucket *head;
 
 	epb = &asoc->base;
 
 	/* Calculate which chain this entry will belong to. */
-	epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port, asoc->peer.port);
+	epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
+					 asoc->peer.port);
 
 	head = &sctp_assoc_hashtable[epb->hashent];
 
@@ -832,12 +837,13 @@ void sctp_hash_established(struct sctp_association *asoc)
 /* Remove association from the hash table.  */
 static void __sctp_unhash_established(struct sctp_association *asoc)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 
 	epb = &asoc->base;
 
-	epb->hashent = sctp_assoc_hashfn(epb->bind_addr.port,
+	epb->hashent = sctp_assoc_hashfn(net, epb->bind_addr.port,
 					 asoc->peer.port);
 
 	head = &sctp_assoc_hashtable[epb->hashent];
@@ -860,6 +866,7 @@ void sctp_unhash_established(struct sctp_association *asoc)
 
 /* Look up an association. */
 static struct sctp_association *__sctp_lookup_association(
+					struct net *net,
 					const union sctp_addr *local,
 					const union sctp_addr *peer,
 					struct sctp_transport **pt)
@@ -874,12 +881,13 @@ static struct sctp_association *__sctp_lookup_association(
 	/* Optimize here for direct hit, only listening connections can
 	 * have wildcards anyways.
 	 */
-	hash = sctp_assoc_hashfn(ntohs(local->v4.sin_port), ntohs(peer->v4.sin_port));
+	hash = sctp_assoc_hashfn(net, ntohs(local->v4.sin_port),
+				 ntohs(peer->v4.sin_port));
 	head = &sctp_assoc_hashtable[hash];
 	read_lock(&head->lock);
 	sctp_for_each_hentry(epb, node, &head->chain) {
 		asoc = sctp_assoc(epb);
-		transport = sctp_assoc_is_match(asoc, local, peer);
+		transport = sctp_assoc_is_match(asoc, net, local, peer);
 		if (transport)
 			goto hit;
 	}
@@ -897,27 +905,29 @@ hit:
 
 /* Look up an association. BH-safe. */
 SCTP_STATIC
-struct sctp_association *sctp_lookup_association(const union sctp_addr *laddr,
+struct sctp_association *sctp_lookup_association(struct net *net,
+						 const union sctp_addr *laddr,
 						 const union sctp_addr *paddr,
 					    struct sctp_transport **transportp)
 {
 	struct sctp_association *asoc;
 
 	sctp_local_bh_disable();
-	asoc = __sctp_lookup_association(laddr, paddr, transportp);
+	asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
 	sctp_local_bh_enable();
 
 	return asoc;
 }
 
 /* Is there an association matching the given local and peer addresses? */
-int sctp_has_association(const union sctp_addr *laddr,
+int sctp_has_association(struct net *net,
+			 const union sctp_addr *laddr,
 			 const union sctp_addr *paddr)
 {
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
 
-	if ((asoc = sctp_lookup_association(laddr, paddr, &transport))) {
+	if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
 		sctp_association_put(asoc);
 		return 1;
 	}
@@ -943,7 +953,8 @@ int sctp_has_association(const union sctp_addr *laddr,
  * in certain circumstances.
  *
  */
-static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
+	struct sk_buff *skb,
 	const union sctp_addr *laddr, struct sctp_transport **transportp)
 {
 	struct sctp_association *asoc;
@@ -983,7 +994,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
 
 		af->from_addr_param(paddr, params.addr, sh->source, 0);
 
-		asoc = __sctp_lookup_association(laddr, paddr, &transport);
+		asoc = __sctp_lookup_association(net, laddr, paddr, &transport);
 		if (asoc)
 			return asoc;
 	}
@@ -1006,6 +1017,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
  * subsequent ASCONF Chunks. If found, proceed to rule D4.
  */
 static struct sctp_association *__sctp_rcv_asconf_lookup(
+					struct net *net,
 					sctp_chunkhdr_t *ch,
 					const union sctp_addr *laddr,
 					__be16 peer_port,
@@ -1025,7 +1037,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 
 	af->from_addr_param(&paddr, param, peer_port, 0);
 
-	return __sctp_lookup_association(laddr, &paddr, transportp);
+	return __sctp_lookup_association(net, laddr, &paddr, transportp);
 }
 
 
@@ -1038,7 +1050,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
 * This means that any chunks that can help us identify the association need
 * to be looked at to find this association.
 */
-static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
+				      struct sk_buff *skb,
 				      const union sctp_addr *laddr,
 				      struct sctp_transport **transportp)
 {
@@ -1080,7 +1093,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
 
 		    case SCTP_CID_ASCONF:
 			    if (have_auth || sctp_addip_noauth)
-				    asoc = __sctp_rcv_asconf_lookup(ch, laddr,
+				    asoc = __sctp_rcv_asconf_lookup(
+							net, ch, laddr,
 							sctp_hdr(skb)->source,
 							transportp);
 		    default:
@@ -1103,7 +1117,8 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct sk_buff *skb,
  * include looking inside of INIT/INIT-ACK chunks or after the AUTH
  * chunks.
  */
-static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
+				      struct sk_buff *skb,
 				      const union sctp_addr *laddr,
 				      struct sctp_transport **transportp)
 {
@@ -1123,11 +1138,11 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
 	switch (ch->type) {
 	case SCTP_CID_INIT:
 	case SCTP_CID_INIT_ACK:
-		return __sctp_rcv_init_lookup(skb, laddr, transportp);
+		return __sctp_rcv_init_lookup(net, skb, laddr, transportp);
 		break;
 
 	default:
-		return __sctp_rcv_walk_lookup(skb, laddr, transportp);
+		return __sctp_rcv_walk_lookup(net, skb, laddr, transportp);
 		break;
 	}
 
@@ -1136,21 +1151,22 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct sk_buff *skb,
 }
 
 /* Lookup an association for an inbound skb. */
-static struct sctp_association *__sctp_rcv_lookup(struct sk_buff *skb,
+static struct sctp_association *__sctp_rcv_lookup(struct net *net,
+				      struct sk_buff *skb,
 				      const union sctp_addr *paddr,
 				      const union sctp_addr *laddr,
 				      struct sctp_transport **transportp)
 {
 	struct sctp_association *asoc;
 
-	asoc = __sctp_lookup_association(laddr, paddr, transportp);
+	asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
 
 	/* Further lookup for INIT/INIT-ACK packets.
 	 * SCTP Implementors Guide, 2.18 Handling of address
 	 * parameters within the INIT or INIT-ACK.
 	 */
 	if (!asoc)
-		asoc = __sctp_rcv_lookup_harder(skb, laddr, transportp);
+		asoc = __sctp_rcv_lookup_harder(net, skb, laddr, transportp);
 
 	return asoc;
 }
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ed7139ea7978..2165a7ed25f1 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -154,6 +154,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct ipv6_pinfo *np;
 	sk_buff_data_t saveip, savesctp;
 	int err;
+	struct net *net = dev_net(skb->dev);
 
 	idev = in6_dev_get(skb->dev);
 
@@ -162,7 +163,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	savesctp = skb->transport_header;
 	skb_reset_network_header(skb);
 	skb_set_transport_header(skb, offset);
-	sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
+	sk = sctp_err_lookup(net, AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
 	/* Put back, the original pointers. */
 	skb->network_header   = saveip;
 	skb->transport_header = savesctp;
-- 
cgit v1.2.3


From 4db67e808640e3934d82ce61ee8e2e89fd877ba8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 6 Aug 2012 08:42:04 +0000
Subject: sctp: Make the address lists per network namespace

- Move the address lists into struct net
- Add per network namespace initialization and cleanup
- Pass around struct net so it is everywhere I need it.
- Rename all of the global variable references into references
  to the variables moved into struct net

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |   4 ++
 include/net/netns/sctp.h    |  21 +++++++
 include/net/sctp/sctp.h     |   4 +-
 include/net/sctp/structs.h  |  22 +------
 net/sctp/associola.c        |   3 +-
 net/sctp/bind_addr.c        |  14 ++---
 net/sctp/ipv6.c             |  17 +++---
 net/sctp/protocol.c         | 143 +++++++++++++++++++++++++-------------------
 net/sctp/socket.c           |   7 ++-
 9 files changed, 132 insertions(+), 103 deletions(-)
 create mode 100644 include/net/netns/sctp.h

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 97e441945c13..5ae57f1ab755 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -15,6 +15,7 @@
 #include <net/netns/packet.h>
 #include <net/netns/ipv4.h>
 #include <net/netns/ipv6.h>
+#include <net/netns/sctp.h>
 #include <net/netns/dccp.h>
 #include <net/netns/x_tables.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -81,6 +82,9 @@ struct net {
 #if IS_ENABLED(CONFIG_IPV6)
 	struct netns_ipv6	ipv6;
 #endif
+#if defined(CONFIG_IP_SCTP) || defined(CONFIG_IP_SCTP_MODULE)
+	struct netns_sctp	sctp;
+#endif
 #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
 	struct netns_dccp	dccp;
 #endif
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
new file mode 100644
index 000000000000..cbd684e01cf7
--- /dev/null
+++ b/include/net/netns/sctp.h
@@ -0,0 +1,21 @@
+#ifndef __NETNS_SCTP_H__
+#define __NETNS_SCTP_H__
+
+struct netns_sctp {
+	/* This is the global local address list.
+	 * We actively maintain this complete list of addresses on
+	 * the system by catching address add/delete events.
+	 *
+	 * It is a list of sctp_sockaddr_entry.
+	 */
+	struct list_head local_addr_list;
+	struct list_head addr_waitq;
+	struct timer_list addr_wq_timer;
+	struct list_head auto_asconf_splist;
+	spinlock_t addr_wq_lock;
+
+	/* Lock that protects the local_addr_list writers */
+	spinlock_t local_addr_lock;
+};
+
+#endif /* __NETNS_SCTP_H__ */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 640915a0613d..00c920537d4a 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -115,12 +115,12 @@
  * sctp/protocol.c
  */
 extern struct sock *sctp_get_ctl_sock(void);
-extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
+extern int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *,
 				     sctp_scope_t, gfp_t gfp,
 				     int flags);
 extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
 extern int sctp_register_pf(struct sctp_pf *, sa_family_t);
-extern void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *, int);
+extern void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int);
 
 /*
  * sctp/socket.c
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index c0563d1dd7c7..6bdfcabe560e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -205,21 +205,7 @@ extern struct sctp_globals {
 	int port_hashsize;
 	struct sctp_bind_hashbucket *port_hashtable;
 
-	/* This is the global local address list.
-	 * We actively maintain this complete list of addresses on
-	 * the system by catching address add/delete events.
-	 *
-	 * It is a list of sctp_sockaddr_entry.
-	 */
-	struct list_head local_addr_list;
 	int default_auto_asconf;
-	struct list_head addr_waitq;
-	struct timer_list addr_wq_timer;
-	struct list_head auto_asconf_splist;
-	spinlock_t addr_wq_lock;
-
-	/* Lock that protects the local_addr_list writers */
-	spinlock_t addr_list_lock;
 	
 	/* Flag to indicate if addip is enabled. */
 	int addip_enable;
@@ -278,12 +264,6 @@ extern struct sctp_globals {
 #define sctp_assoc_hashtable		(sctp_globals.assoc_hashtable)
 #define sctp_port_hashsize		(sctp_globals.port_hashsize)
 #define sctp_port_hashtable		(sctp_globals.port_hashtable)
-#define sctp_local_addr_list		(sctp_globals.local_addr_list)
-#define sctp_local_addr_lock		(sctp_globals.addr_list_lock)
-#define sctp_auto_asconf_splist		(sctp_globals.auto_asconf_splist)
-#define sctp_addr_waitq			(sctp_globals.addr_waitq)
-#define sctp_addr_wq_timer		(sctp_globals.addr_wq_timer)
-#define sctp_addr_wq_lock		(sctp_globals.addr_wq_lock)
 #define sctp_default_auto_asconf	(sctp_globals.default_auto_asconf)
 #define sctp_scope_policy		(sctp_globals.ipv4_scope_policy)
 #define sctp_addip_enable		(sctp_globals.addip_enable)
@@ -1241,7 +1221,7 @@ struct sctp_bind_addr {
 
 void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port);
 void sctp_bind_addr_free(struct sctp_bind_addr *);
-int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
+int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
 			sctp_scope_t scope, gfp_t gfp,
 			int flags);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a3601f35ac15..ed4930b31341 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1544,7 +1544,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
 	if (asoc->peer.ipv6_address)
 		flags |= SCTP_ADDR6_PEERSUPP;
 
-	return sctp_bind_addr_copy(&asoc->base.bind_addr,
+	return sctp_bind_addr_copy(sock_net(asoc->base.sk),
+				   &asoc->base.bind_addr,
 				   &asoc->ep->base.bind_addr,
 				   scope, gfp, flags);
 }
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 4ece451c8d27..a85ce4b3e574 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -52,8 +52,8 @@
 #include <net/sctp/sm.h>
 
 /* Forward declarations for internal helpers. */
-static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *,
-			      sctp_scope_t scope, gfp_t gfp,
+static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *,
+			      union sctp_addr *, sctp_scope_t scope, gfp_t gfp,
 			      int flags);
 static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 
@@ -62,7 +62,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
 /* Copy 'src' to 'dest' taking 'scope' into account.  Omit addresses
  * in 'src' which have a broader scope than 'scope'.
  */
-int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
+int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
 			const struct sctp_bind_addr *src,
 			sctp_scope_t scope, gfp_t gfp,
 			int flags)
@@ -75,7 +75,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
 
 	/* Extract the addresses which are relevant for this scope.  */
 	list_for_each_entry(addr, &src->address_list, list) {
-		error = sctp_copy_one_addr(dest, &addr->a, scope,
+		error = sctp_copy_one_addr(net, dest, &addr->a, scope,
 					   gfp, flags);
 		if (error < 0)
 			goto out;
@@ -87,7 +87,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
 	 */
 	if (list_empty(&dest->address_list) && (SCTP_SCOPE_GLOBAL == scope)) {
 		list_for_each_entry(addr, &src->address_list, list) {
-			error = sctp_copy_one_addr(dest, &addr->a,
+			error = sctp_copy_one_addr(net, dest, &addr->a,
 						   SCTP_SCOPE_LINK, gfp,
 						   flags);
 			if (error < 0)
@@ -448,7 +448,7 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr	*bp,
 }
 
 /* Copy out addresses from the global local address list. */
-static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
+static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
 			      union sctp_addr *addr,
 			      sctp_scope_t scope, gfp_t gfp,
 			      int flags)
@@ -456,7 +456,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
 	int error = 0;
 
 	if (sctp_is_any(NULL, addr)) {
-		error = sctp_copy_local_addr_list(dest, scope, gfp, flags);
+		error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags);
 	} else if (sctp_in_scope(addr, scope)) {
 		/* Now that the address is in scope, check to see if
 		 * the address type is supported by local sock as
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2165a7ed25f1..bbf15341eb2b 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -99,6 +99,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
 	struct sctp_sockaddr_entry *addr = NULL;
 	struct sctp_sockaddr_entry *temp;
+	struct net *net = dev_net(ifa->idev->dev);
 	int found = 0;
 
 	switch (ev) {
@@ -110,27 +111,27 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 			addr->a.v6.sin6_addr = ifa->addr;
 			addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
 			addr->valid = 1;
-			spin_lock_bh(&sctp_local_addr_lock);
-			list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
-			sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW);
-			spin_unlock_bh(&sctp_local_addr_lock);
+			spin_lock_bh(&net->sctp.local_addr_lock);
+			list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list);
+			sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW);
+			spin_unlock_bh(&net->sctp.local_addr_lock);
 		}
 		break;
 	case NETDEV_DOWN:
-		spin_lock_bh(&sctp_local_addr_lock);
+		spin_lock_bh(&net->sctp.local_addr_lock);
 		list_for_each_entry_safe(addr, temp,
-					&sctp_local_addr_list, list) {
+					&net->sctp.local_addr_list, list) {
 			if (addr->a.sa.sa_family == AF_INET6 &&
 					ipv6_addr_equal(&addr->a.v6.sin6_addr,
 						&ifa->addr)) {
-				sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL);
+				sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
 				found = 1;
 				addr->valid = 0;
 				list_del_rcu(&addr->list);
 				break;
 			}
 		}
-		spin_unlock_bh(&sctp_local_addr_lock);
+		spin_unlock_bh(&net->sctp.local_addr_lock);
 		if (found)
 			kfree_rcu(addr, rcu);
 		break;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1f89c4e69645..7025d96bae5f 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -201,29 +201,29 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
 /* Extract our IP addresses from the system and stash them in the
  * protocol structure.
  */
-static void sctp_get_local_addr_list(void)
+static void sctp_get_local_addr_list(struct net *net)
 {
 	struct net_device *dev;
 	struct list_head *pos;
 	struct sctp_af *af;
 
 	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, dev) {
+	for_each_netdev_rcu(net, dev) {
 		__list_for_each(pos, &sctp_address_families) {
 			af = list_entry(pos, struct sctp_af, list);
-			af->copy_addrlist(&sctp_local_addr_list, dev);
+			af->copy_addrlist(&net->sctp.local_addr_list, dev);
 		}
 	}
 	rcu_read_unlock();
 }
 
 /* Free the existing local addresses.  */
-static void sctp_free_local_addr_list(void)
+static void sctp_free_local_addr_list(struct net *net)
 {
 	struct sctp_sockaddr_entry *addr;
 	struct list_head *pos, *temp;
 
-	list_for_each_safe(pos, temp, &sctp_local_addr_list) {
+	list_for_each_safe(pos, temp, &net->sctp.local_addr_list) {
 		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
 		list_del(pos);
 		kfree(addr);
@@ -231,14 +231,14 @@ static void sctp_free_local_addr_list(void)
 }
 
 /* Copy the local addresses which are valid for 'scope' into 'bp'.  */
-int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
-			      gfp_t gfp, int copy_flags)
+int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
+			      sctp_scope_t scope, gfp_t gfp, int copy_flags)
 {
 	struct sctp_sockaddr_entry *addr;
 	int error = 0;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+	list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
 		if (!addr->valid)
 			continue;
 		if (sctp_in_scope(&addr->a, scope)) {
@@ -627,14 +627,15 @@ static void sctp_v4_ecn_capable(struct sock *sk)
 
 void sctp_addr_wq_timeout_handler(unsigned long arg)
 {
+	struct net *net = (struct net *)arg;
 	struct sctp_sockaddr_entry *addrw, *temp;
 	struct sctp_sock *sp;
 
-	spin_lock_bh(&sctp_addr_wq_lock);
+	spin_lock_bh(&net->sctp.addr_wq_lock);
 
-	list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) {
+	list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) {
 		SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ",
-		    " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state,
+		    " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state,
 		    addrw);
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -648,7 +649,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
 				goto free_next;
 
 			in6 = (struct in6_addr *)&addrw->a.v6.sin6_addr;
-			if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 &&
+			if (ipv6_chk_addr(net, in6, NULL, 0) == 0 &&
 			    addrw->state == SCTP_ADDR_NEW) {
 				unsigned long timeo_val;
 
@@ -656,12 +657,12 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
 				    SCTP_ADDRESS_TICK_DELAY);
 				timeo_val = jiffies;
 				timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
-				mod_timer(&sctp_addr_wq_timer, timeo_val);
+				mod_timer(&net->sctp.addr_wq_timer, timeo_val);
 				break;
 			}
 		}
 #endif
-		list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) {
+		list_for_each_entry(sp, &net->sctp.auto_asconf_splist, auto_asconf_list) {
 			struct sock *sk;
 
 			sk = sctp_opt2sk(sp);
@@ -679,31 +680,32 @@ free_next:
 		list_del(&addrw->list);
 		kfree(addrw);
 	}
-	spin_unlock_bh(&sctp_addr_wq_lock);
+	spin_unlock_bh(&net->sctp.addr_wq_lock);
 }
 
-static void sctp_free_addr_wq(void)
+static void sctp_free_addr_wq(struct net *net)
 {
 	struct sctp_sockaddr_entry *addrw;
 	struct sctp_sockaddr_entry *temp;
 
-	spin_lock_bh(&sctp_addr_wq_lock);
-	del_timer(&sctp_addr_wq_timer);
-	list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) {
+	spin_lock_bh(&net->sctp.addr_wq_lock);
+	del_timer(&net->sctp.addr_wq_timer);
+	list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) {
 		list_del(&addrw->list);
 		kfree(addrw);
 	}
-	spin_unlock_bh(&sctp_addr_wq_lock);
+	spin_unlock_bh(&net->sctp.addr_wq_lock);
 }
 
 /* lookup the entry for the same address in the addr_waitq
  * sctp_addr_wq MUST be locked
  */
-static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr)
+static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct net *net,
+					struct sctp_sockaddr_entry *addr)
 {
 	struct sctp_sockaddr_entry *addrw;
 
-	list_for_each_entry(addrw, &sctp_addr_waitq, list) {
+	list_for_each_entry(addrw, &net->sctp.addr_waitq, list) {
 		if (addrw->a.sa.sa_family != addr->a.sa.sa_family)
 			continue;
 		if (addrw->a.sa.sa_family == AF_INET) {
@@ -719,7 +721,7 @@ static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entr
 	return NULL;
 }
 
-void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd)
+void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cmd)
 {
 	struct sctp_sockaddr_entry *addrw;
 	unsigned long timeo_val;
@@ -730,38 +732,38 @@ void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd)
 	 * new address after a couple of addition and deletion of that address
 	 */
 
-	spin_lock_bh(&sctp_addr_wq_lock);
+	spin_lock_bh(&net->sctp.addr_wq_lock);
 	/* Offsets existing events in addr_wq */
-	addrw = sctp_addr_wq_lookup(addr);
+	addrw = sctp_addr_wq_lookup(net, addr);
 	if (addrw) {
 		if (addrw->state != cmd) {
 			SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ",
 			    " in wq %p\n", addrw->state, &addrw->a,
-			    &sctp_addr_waitq);
+			    &net->sctp.addr_waitq);
 			list_del(&addrw->list);
 			kfree(addrw);
 		}
-		spin_unlock_bh(&sctp_addr_wq_lock);
+		spin_unlock_bh(&net->sctp.addr_wq_lock);
 		return;
 	}
 
 	/* OK, we have to add the new address to the wait queue */
 	addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
 	if (addrw == NULL) {
-		spin_unlock_bh(&sctp_addr_wq_lock);
+		spin_unlock_bh(&net->sctp.addr_wq_lock);
 		return;
 	}
 	addrw->state = cmd;
-	list_add_tail(&addrw->list, &sctp_addr_waitq);
+	list_add_tail(&addrw->list, &net->sctp.addr_waitq);
 	SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ",
-	    " in wq %p\n", addrw->state, &addrw->a, &sctp_addr_waitq);
+	    " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq);
 
-	if (!timer_pending(&sctp_addr_wq_timer)) {
+	if (!timer_pending(&net->sctp.addr_wq_timer)) {
 		timeo_val = jiffies;
 		timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
-		mod_timer(&sctp_addr_wq_timer, timeo_val);
+		mod_timer(&net->sctp.addr_wq_timer, timeo_val);
 	}
-	spin_unlock_bh(&sctp_addr_wq_lock);
+	spin_unlock_bh(&net->sctp.addr_wq_lock);
 }
 
 /* Event handler for inet address addition/deletion events.
@@ -776,11 +778,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
 	struct sctp_sockaddr_entry *addr = NULL;
 	struct sctp_sockaddr_entry *temp;
+	struct net *net = dev_net(ifa->ifa_dev->dev);
 	int found = 0;
 
-	if (!net_eq(dev_net(ifa->ifa_dev->dev), &init_net))
-		return NOTIFY_DONE;
-
 	switch (ev) {
 	case NETDEV_UP:
 		addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
@@ -789,27 +789,27 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 			addr->a.v4.sin_port = 0;
 			addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
 			addr->valid = 1;
-			spin_lock_bh(&sctp_local_addr_lock);
-			list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
-			sctp_addr_wq_mgmt(addr, SCTP_ADDR_NEW);
-			spin_unlock_bh(&sctp_local_addr_lock);
+			spin_lock_bh(&net->sctp.local_addr_lock);
+			list_add_tail_rcu(&addr->list, &net->sctp.local_addr_list);
+			sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_NEW);
+			spin_unlock_bh(&net->sctp.local_addr_lock);
 		}
 		break;
 	case NETDEV_DOWN:
-		spin_lock_bh(&sctp_local_addr_lock);
+		spin_lock_bh(&net->sctp.local_addr_lock);
 		list_for_each_entry_safe(addr, temp,
-					&sctp_local_addr_list, list) {
+					&net->sctp.local_addr_list, list) {
 			if (addr->a.sa.sa_family == AF_INET &&
 					addr->a.v4.sin_addr.s_addr ==
 					ifa->ifa_local) {
-				sctp_addr_wq_mgmt(addr, SCTP_ADDR_DEL);
+				sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
 				found = 1;
 				addr->valid = 0;
 				list_del_rcu(&addr->list);
 				break;
 			}
 		}
-		spin_unlock_bh(&sctp_local_addr_lock);
+		spin_unlock_bh(&net->sctp.local_addr_lock);
 		if (found)
 			kfree_rcu(addr, rcu);
 		break;
@@ -1194,6 +1194,36 @@ static void sctp_v4_del_protocol(void)
 	unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
 }
 
+static int sctp_net_init(struct net *net)
+{
+	/* Initialize the local address list. */
+	INIT_LIST_HEAD(&net->sctp.local_addr_list);
+	spin_lock_init(&net->sctp.local_addr_lock);
+	sctp_get_local_addr_list(net);
+
+	/* Initialize the address event list */
+	INIT_LIST_HEAD(&net->sctp.addr_waitq);
+	INIT_LIST_HEAD(&net->sctp.auto_asconf_splist);
+	spin_lock_init(&net->sctp.addr_wq_lock);
+	net->sctp.addr_wq_timer.expires = 0;
+	setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler,
+		    (unsigned long)net);
+
+	return 0;
+}
+
+static void sctp_net_exit(struct net *net)
+{
+	/* Free the local address list */
+	sctp_free_addr_wq(net);
+	sctp_free_local_addr_list(net);
+}
+
+static struct pernet_operations sctp_net_ops = {
+	.init = sctp_net_init,
+	.exit = sctp_net_exit,
+};
+
 /* Initialize the universe into something sensible.  */
 SCTP_STATIC __init int sctp_init(void)
 {
@@ -1399,18 +1429,6 @@ SCTP_STATIC __init int sctp_init(void)
 	sctp_v4_pf_init();
 	sctp_v6_pf_init();
 
-	/* Initialize the local address list. */
-	INIT_LIST_HEAD(&sctp_local_addr_list);
-	spin_lock_init(&sctp_local_addr_lock);
-	sctp_get_local_addr_list();
-
-	/* Initialize the address event list */
-	INIT_LIST_HEAD(&sctp_addr_waitq);
-	INIT_LIST_HEAD(&sctp_auto_asconf_splist);
-	spin_lock_init(&sctp_addr_wq_lock);
-	sctp_addr_wq_timer.expires = 0;
-	setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
-
 	status = sctp_v4_protosw_init();
 
 	if (status)
@@ -1426,6 +1444,10 @@ SCTP_STATIC __init int sctp_init(void)
 		goto err_ctl_sock_init;
 	}
 
+	status = register_pernet_subsys(&sctp_net_ops);
+	if (status)
+		goto err_register_pernet_subsys;
+
 	status = sctp_v4_add_protocol();
 	if (status)
 		goto err_add_protocol;
@@ -1441,13 +1463,14 @@ out:
 err_v6_add_protocol:
 	sctp_v4_del_protocol();
 err_add_protocol:
+	unregister_pernet_subsys(&sctp_net_ops);
+err_register_pernet_subsys:
 	inet_ctl_sock_destroy(sctp_ctl_sock);
 err_ctl_sock_init:
 	sctp_v6_protosw_exit();
 err_v6_protosw_init:
 	sctp_v4_protosw_exit();
 err_protosw_init:
-	sctp_free_local_addr_list();
 	sctp_v4_pf_exit();
 	sctp_v6_pf_exit();
 	sctp_sysctl_unregister();
@@ -1482,18 +1505,16 @@ SCTP_STATIC __exit void sctp_exit(void)
 	/* Unregister with inet6/inet layers. */
 	sctp_v6_del_protocol();
 	sctp_v4_del_protocol();
-	sctp_free_addr_wq();
 
 	/* Free the control endpoint.  */
 	inet_ctl_sock_destroy(sctp_ctl_sock);
 
+	unregister_pernet_subsys(&sctp_net_ops);
+
 	/* Free protosw registrations */
 	sctp_v6_protosw_exit();
 	sctp_v4_protosw_exit();
 
-	/* Free the local address list.  */
-	sctp_free_local_addr_list();
-
 	/* Unregister with socket layer. */
 	sctp_v6_pf_exit();
 	sctp_v4_pf_exit();
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 4316b0f988d4..5b6dd0e3d1f6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3471,7 +3471,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
 		sp->do_auto_asconf = 0;
 	} else if (val && !sp->do_auto_asconf) {
 		list_add_tail(&sp->auto_asconf_list,
-		    &sctp_auto_asconf_splist);
+		    &sock_net(sk)->sctp.auto_asconf_splist);
 		sp->do_auto_asconf = 1;
 	}
 	return 0;
@@ -3964,7 +3964,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	if (sctp_default_auto_asconf) {
 		list_add_tail(&sp->auto_asconf_list,
-		    &sctp_auto_asconf_splist);
+		    &sock_net(sk)->sctp.auto_asconf_splist);
 		sp->do_auto_asconf = 1;
 	} else
 		sp->do_auto_asconf = 0;
@@ -4653,9 +4653,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
 	union sctp_addr temp;
 	int cnt = 0;
 	int addrlen;
+	struct net *net = sock_net(sk);
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+	list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
 		if (!addr->valid)
 			continue;
 
-- 
cgit v1.2.3


From 2ce955035081112cf1590c961da8d94324142b5e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 6 Aug 2012 08:43:06 +0000
Subject: sctp: Make the ctl_sock per network namespace

- Kill sctp_get_ctl_sock, it is useless now.
- Pass struct net where needed so net->sctp.ctl_sock is accessible.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/sctp.h |  8 ++++++++
 include/net/sctp/sctp.h  |  1 -
 net/sctp/input.c         |  4 ++--
 net/sctp/protocol.c      | 47 +++++++++++++++++++----------------------------
 net/sctp/sm_statefuns.c  | 45 +++++++++++++++++++++++++++++++--------------
 5 files changed, 60 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index cbd684e01cf7..29e36b4b4feb 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -1,7 +1,15 @@
 #ifndef __NETNS_SCTP_H__
 #define __NETNS_SCTP_H__
 
+struct sock;
+
 struct netns_sctp {
+	/* This is the global socket data structure used for responding to
+	 * the Out-of-the-blue (OOTB) packets.  A control sock will be created
+	 * for this socket at the initialization time.
+	 */
+	struct sock *ctl_sock;
+
 	/* This is the global local address list.
 	 * We actively maintain this complete list of addresses on
 	 * the system by catching address add/delete events.
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 00c920537d4a..550a81bbfc71 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -114,7 +114,6 @@
 /*
  * sctp/protocol.c
  */
-extern struct sock *sctp_get_ctl_sock(void);
 extern int sctp_copy_local_addr_list(struct net *, struct sctp_bind_addr *,
 				     sctp_scope_t, gfp_t gfp,
 				     int flags);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a7e9a85b5acb..c9a0449bde53 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -204,7 +204,7 @@ int sctp_rcv(struct sk_buff *skb)
 			sctp_endpoint_put(ep);
 			ep = NULL;
 		}
-		sk = sctp_get_ctl_sock();
+		sk = net->sctp.ctl_sock;
 		ep = sctp_sk(sk)->ep;
 		sctp_endpoint_hold(ep);
 		rcvr = &ep->base;
@@ -795,7 +795,7 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
 			goto hit;
 	}
 
-	ep = sctp_sk((sctp_get_ctl_sock()))->ep;
+	ep = sctp_sk(net->sctp.ctl_sock)->ep;
 
 hit:
 	sctp_endpoint_hold(ep);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 7025d96bae5f..f20bd708e89c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -78,12 +78,6 @@ struct proc_dir_entry	*proc_net_sctp;
 struct idr sctp_assocs_id;
 DEFINE_SPINLOCK(sctp_assocs_id_lock);
 
-/* This is the global socket data structure used for responding to
- * the Out-of-the-blue (OOTB) packets.  A control sock will be created
- * for this socket at the initialization time.
- */
-static struct sock *sctp_ctl_sock;
-
 static struct sctp_pf *sctp_pf_inet6_specific;
 static struct sctp_pf *sctp_pf_inet_specific;
 static struct sctp_af *sctp_af_v4_specific;
@@ -96,12 +90,6 @@ long sysctl_sctp_mem[3];
 int sysctl_sctp_rmem[3];
 int sysctl_sctp_wmem[3];
 
-/* Return the address of the control sock. */
-struct sock *sctp_get_ctl_sock(void)
-{
-	return sctp_ctl_sock;
-}
-
 /* Set up the proc fs entry for the SCTP protocol. */
 static __init int sctp_proc_init(void)
 {
@@ -822,7 +810,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
  * Initialize the control inode/socket with a control endpoint data
  * structure.  This endpoint is reserved exclusively for the OOTB processing.
  */
-static int sctp_ctl_sock_init(void)
+static int sctp_ctl_sock_init(struct net *net)
 {
 	int err;
 	sa_family_t family = PF_INET;
@@ -830,14 +818,14 @@ static int sctp_ctl_sock_init(void)
 	if (sctp_get_pf_specific(PF_INET6))
 		family = PF_INET6;
 
-	err = inet_ctl_sock_create(&sctp_ctl_sock, family,
-				   SOCK_SEQPACKET, IPPROTO_SCTP, &init_net);
+	err = inet_ctl_sock_create(&net->sctp.ctl_sock, family,
+				   SOCK_SEQPACKET, IPPROTO_SCTP, net);
 
 	/* If IPv6 socket could not be created, try the IPv4 socket */
 	if (err < 0 && family == PF_INET6)
-		err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET,
+		err = inet_ctl_sock_create(&net->sctp.ctl_sock, AF_INET,
 					   SOCK_SEQPACKET, IPPROTO_SCTP,
-					   &init_net);
+					   net);
 
 	if (err < 0) {
 		pr_err("Failed to create the SCTP control socket\n");
@@ -1196,6 +1184,14 @@ static void sctp_v4_del_protocol(void)
 
 static int sctp_net_init(struct net *net)
 {
+	int status;
+
+	/* Initialize the control inode/socket for handling OOTB packets.  */
+	if ((status = sctp_ctl_sock_init(net))) {
+		pr_err("Failed to initialize the SCTP control sock\n");
+		goto err_ctl_sock_init;
+	}
+
 	/* Initialize the local address list. */
 	INIT_LIST_HEAD(&net->sctp.local_addr_list);
 	spin_lock_init(&net->sctp.local_addr_lock);
@@ -1210,6 +1206,9 @@ static int sctp_net_init(struct net *net)
 		    (unsigned long)net);
 
 	return 0;
+
+err_ctl_sock_init:
+	return status;
 }
 
 static void sctp_net_exit(struct net *net)
@@ -1217,6 +1216,9 @@ static void sctp_net_exit(struct net *net)
 	/* Free the local address list */
 	sctp_free_addr_wq(net);
 	sctp_free_local_addr_list(net);
+
+	/* Free the control endpoint.  */
+	inet_ctl_sock_destroy(net->sctp.ctl_sock);
 }
 
 static struct pernet_operations sctp_net_ops = {
@@ -1438,12 +1440,6 @@ SCTP_STATIC __init int sctp_init(void)
 	if (status)
 		goto err_v6_protosw_init;
 
-	/* Initialize the control inode/socket for handling OOTB packets.  */
-	if ((status = sctp_ctl_sock_init())) {
-		pr_err("Failed to initialize the SCTP control sock\n");
-		goto err_ctl_sock_init;
-	}
-
 	status = register_pernet_subsys(&sctp_net_ops);
 	if (status)
 		goto err_register_pernet_subsys;
@@ -1465,8 +1461,6 @@ err_v6_add_protocol:
 err_add_protocol:
 	unregister_pernet_subsys(&sctp_net_ops);
 err_register_pernet_subsys:
-	inet_ctl_sock_destroy(sctp_ctl_sock);
-err_ctl_sock_init:
 	sctp_v6_protosw_exit();
 err_v6_protosw_init:
 	sctp_v4_protosw_exit();
@@ -1506,9 +1500,6 @@ SCTP_STATIC __exit void sctp_exit(void)
 	sctp_v6_del_protocol();
 	sctp_v4_del_protocol();
 
-	/* Free the control endpoint.  */
-	inet_ctl_sock_destroy(sctp_ctl_sock);
-
 	unregister_pernet_subsys(&sctp_net_ops);
 
 	/* Free protosw registrations */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 9fca10357350..f2daf615e8fe 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -74,7 +74,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
 static int sctp_eat_data(const struct sctp_association *asoc,
 			 struct sctp_chunk *chunk,
 			 sctp_cmd_seq_t *commands);
-static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc,
+static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
+					     const struct sctp_association *asoc,
 					     const struct sctp_chunk *chunk);
 static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
@@ -301,6 +302,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
 	sctp_unrecognized_param_t *unk_param;
+	struct net *net;
 	int len;
 
 	/* 6.10 Bundling
@@ -318,7 +320,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
 	 */
-	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) {
+	net = sock_net(ep->base.sk);
+	if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
 		SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
 		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
 	}
@@ -646,11 +649,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	int error = 0;
 	struct sctp_chunk *err_chk_p;
 	struct sock *sk;
+	struct net *net;
 
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
 	 */
-	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) {
+	net = sock_net(ep->base.sk);
+	if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
 		SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
 		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
 	}
@@ -1171,7 +1176,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
 /* Helper function to send out an abort for the restart
  * condition.
  */
-static int sctp_sf_send_restart_abort(union sctp_addr *ssa,
+static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 				      struct sctp_chunk *init,
 				      sctp_cmd_seq_t *commands)
 {
@@ -1197,7 +1202,7 @@ static int sctp_sf_send_restart_abort(union sctp_addr *ssa,
 	errhdr->length = htons(len);
 
 	/* Assign to the control socket. */
-	ep = sctp_sk((sctp_get_ctl_sock()))->ep;
+	ep = sctp_sk(net->sctp.ctl_sock)->ep;
 
 	/* Association is NULL since this may be a restart attack and we
 	 * want to send back the attacker's vtag.
@@ -1240,6 +1245,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
 				       struct sctp_chunk *init,
 				       sctp_cmd_seq_t *commands)
 {
+	struct net *net = sock_net(new_asoc->base.sk);
 	struct sctp_transport *new_addr;
 	int ret = 1;
 
@@ -1258,7 +1264,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
 			    transports) {
 		if (!list_has_sctp_addr(&asoc->peer.transport_addr_list,
 					&new_addr->ipaddr)) {
-			sctp_sf_send_restart_abort(&new_addr->ipaddr, init,
+			sctp_sf_send_restart_abort(net, &new_addr->ipaddr, init,
 						   commands);
 			ret = 0;
 			break;
@@ -1650,10 +1656,11 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
 					    const sctp_subtype_t type,
 					    void *arg, sctp_cmd_seq_t *commands)
 {
+	struct net *net = sock_net(ep->base.sk);
 	/* Per the above section, we'll discard the chunk if we have an
 	 * endpoint.  If this is an OOTB INIT-ACK, treat it as such.
 	 */
-	if (ep == sctp_sk((sctp_get_ctl_sock()))->ep)
+	if (ep == sctp_sk(net->sctp.ctl_sock)->ep)
 		return sctp_sf_ootb(ep, asoc, type, arg, commands);
 	else
 		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
@@ -3163,8 +3170,10 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *abort;
+	struct net *net;
 
-	packet = sctp_ootb_pkt_new(asoc, chunk);
+	net = sock_net(ep->base.sk);
+	packet = sctp_ootb_pkt_new(net, asoc, chunk);
 
 	if (packet) {
 		/* Make an ABORT. The T bit will be set if the asoc
@@ -3425,8 +3434,10 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *shut;
+	struct net *net;
 
-	packet = sctp_ootb_pkt_new(asoc, chunk);
+	net = sock_net(ep->base.sk);
+	packet = sctp_ootb_pkt_new(net, asoc, chunk);
 
 	if (packet) {
 		/* Make an SHUTDOWN_COMPLETE.
@@ -4262,6 +4273,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk =  arg;
 	struct sctp_chunk *abort = NULL;
+	struct net *net;
 
 	/* SCTP-AUTH, Section 6.3:
 	 *    It should be noted that if the receiver wants to tear
@@ -4282,6 +4294,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 	if (!abort)
 		goto nomem;
 
+	net = sock_net(ep->base.sk);
 	if (asoc) {
 		/* Treat INIT-ACK as a special case during COOKIE-WAIT. */
 		if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK &&
@@ -4319,7 +4332,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 			SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
 		}
 	} else {
-		packet = sctp_ootb_pkt_new(asoc, chunk);
+		packet = sctp_ootb_pkt_new(net, asoc, chunk);
 
 		if (!packet)
 			goto nomem_pkt;
@@ -5825,8 +5838,10 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
 {
 	struct sctp_packet *packet;
 	struct sctp_chunk *abort;
+	struct net *net;
 
-	packet = sctp_ootb_pkt_new(asoc, chunk);
+	net = sock_net(ep->base.sk);
+	packet = sctp_ootb_pkt_new(net, asoc, chunk);
 
 	if (packet) {
 		/* Make an ABORT.
@@ -5858,7 +5873,8 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
 }
 
 /* Allocate a packet for responding in the OOTB conditions.  */
-static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc,
+static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
+					     const struct sctp_association *asoc,
 					     const struct sctp_chunk *chunk)
 {
 	struct sctp_packet *packet;
@@ -5919,7 +5935,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(const struct sctp_association *asoc
 	 * the source address.
 	 */
 	sctp_transport_route(transport, (union sctp_addr *)&chunk->dest,
-			     sctp_sk(sctp_get_ctl_sock()));
+			     sctp_sk(net->sctp.ctl_sock));
 
 	packet = sctp_packet_init(&transport->packet, transport, sport, dport);
 	packet = sctp_packet_config(packet, vtag, 0);
@@ -5946,7 +5962,8 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
 	struct sctp_packet *packet;
 
 	if (err_chunk) {
-		packet = sctp_ootb_pkt_new(asoc, chunk);
+		struct net *net = sock_net(ep->base.sk);
+		packet = sctp_ootb_pkt_new(net, asoc, chunk);
 		if (packet) {
 			struct sctp_signed_cookie *cookie;
 
-- 
cgit v1.2.3


From 13d782f6b4fbbaf9d0380a9947deb45a9de46ae7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 6 Aug 2012 08:45:15 +0000
Subject: sctp: Make the proc files per network namespace.

- Convert all of the files under /proc/net/sctp to be per
  network namespace.

- Don't print anything for /proc/net/sctp/snmp except in
  the initial network namespaces as the snmp counters still
  have to be converted to be per network namespace.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/sctp.h |  5 +++
 include/net/sctp/sctp.h  | 25 +++++++-------
 net/sctp/objcnt.c        |  8 ++---
 net/sctp/proc.c          | 59 +++++++++++++++++++++-----------
 net/sctp/protocol.c      | 87 +++++++++++++++++++++---------------------------
 5 files changed, 98 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index 29e36b4b4feb..9c20a82a77ec 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -2,8 +2,13 @@
 #define __NETNS_SCTP_H__
 
 struct sock;
+struct proc_dir_entry;
 
 struct netns_sctp {
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *proc_net_sctp;
+#endif
+
 	/* This is the global socket data structure used for responding to
 	 * the Out-of-the-blue (OOTB) packets.  A control sock will be created
 	 * for this socket at the initialization time.
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 550a81bbfc71..b49588a51d80 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -172,14 +172,14 @@ void sctp_backlog_migrate(struct sctp_association *assoc,
 /*
  * sctp/proc.c
  */
-int sctp_snmp_proc_init(void);
-void sctp_snmp_proc_exit(void);
-int sctp_eps_proc_init(void);
-void sctp_eps_proc_exit(void);
-int sctp_assocs_proc_init(void);
-void sctp_assocs_proc_exit(void);
-int sctp_remaddr_proc_init(void);
-void sctp_remaddr_proc_exit(void);
+int sctp_snmp_proc_init(struct net *net);
+void sctp_snmp_proc_exit(struct net *net);
+int sctp_eps_proc_init(struct net *net);
+void sctp_eps_proc_exit(struct net *net);
+int sctp_assocs_proc_init(struct net *net);
+void sctp_assocs_proc_exit(struct net *net);
+int sctp_remaddr_proc_init(struct net *net);
+void sctp_remaddr_proc_exit(struct net *net);
 
 
 /*
@@ -360,16 +360,16 @@ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0)
 #define SCTP_DBG_OBJCNT_ENTRY(name) \
 {.label= #name, .counter= &sctp_dbg_objcnt_## name}
 
-void sctp_dbg_objcnt_init(void);
-void sctp_dbg_objcnt_exit(void);
+void sctp_dbg_objcnt_init(struct net *);
+void sctp_dbg_objcnt_exit(struct net *);
 
 #else
 
 #define SCTP_DBG_OBJCNT_INC(name)
 #define SCTP_DBG_OBJCNT_DEC(name)
 
-static inline void sctp_dbg_objcnt_init(void) { return; }
-static inline void sctp_dbg_objcnt_exit(void) { return; }
+static inline void sctp_dbg_objcnt_init(struct net *) { return; }
+static inline void sctp_dbg_objcnt_exit(struct net *) { return; }
 
 #endif /* CONFIG_SCTP_DBG_OBJCOUNT */
 
@@ -585,7 +585,6 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\
 
 extern struct proto sctp_prot;
 extern struct proto sctpv6_prot;
-extern struct proc_dir_entry *proc_net_sctp;
 void sctp_put_port(struct sock *sk);
 
 extern struct idr sctp_assocs_id;
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index 8ef8e7d9eb61..fe012c44f8df 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -129,20 +129,20 @@ static const struct file_operations sctp_objcnt_ops = {
 };
 
 /* Initialize the objcount in the proc filesystem.  */
-void sctp_dbg_objcnt_init(void)
+void sctp_dbg_objcnt_init(struct net *net)
 {
 	struct proc_dir_entry *ent;
 
 	ent = proc_create("sctp_dbg_objcnt", 0,
-			  proc_net_sctp, &sctp_objcnt_ops);
+			  net->sctp.proc_net_sctp, &sctp_objcnt_ops);
 	if (!ent)
 		pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
 }
 
 /* Cleanup the objcount entry in the proc filesystem.  */
-void sctp_dbg_objcnt_exit(void)
+void sctp_dbg_objcnt_exit(struct net *net)
 {
-	remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp);
+	remove_proc_entry("sctp_dbg_objcnt", net->sctp.proc_net_sctp);
 }
 
 
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 1e2eee88c3ea..dc79a3aa0382 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -80,8 +80,12 @@ static const struct snmp_mib sctp_snmp_list[] = {
 /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
 static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
 {
+	struct net *net = seq->private;
 	int i;
 
+	if (!net_eq(net, &init_net))
+		return 0;
+
 	for (i = 0; sctp_snmp_list[i].name != NULL; i++)
 		seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
 			   snmp_fold_field((void __percpu **)sctp_statistics,
@@ -93,7 +97,7 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
 /* Initialize the seq file operations for 'snmp' object. */
 static int sctp_snmp_seq_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, sctp_snmp_seq_show, NULL);
+	return single_open_net(inode, file, sctp_snmp_seq_show);
 }
 
 static const struct file_operations sctp_snmp_seq_fops = {
@@ -105,11 +109,12 @@ static const struct file_operations sctp_snmp_seq_fops = {
 };
 
 /* Set up the proc fs entry for 'snmp' object. */
-int __init sctp_snmp_proc_init(void)
+int __net_init sctp_snmp_proc_init(struct net *net)
 {
 	struct proc_dir_entry *p;
 
-	p = proc_create("snmp", S_IRUGO, proc_net_sctp, &sctp_snmp_seq_fops);
+	p = proc_create("snmp", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_snmp_seq_fops);
 	if (!p)
 		return -ENOMEM;
 
@@ -117,9 +122,9 @@ int __init sctp_snmp_proc_init(void)
 }
 
 /* Cleanup the proc fs entry for 'snmp' object. */
-void sctp_snmp_proc_exit(void)
+void sctp_snmp_proc_exit(struct net *net)
 {
-	remove_proc_entry("snmp", proc_net_sctp);
+	remove_proc_entry("snmp", net->sctp.proc_net_sctp);
 }
 
 /* Dump local addresses of an association/endpoint. */
@@ -197,6 +202,7 @@ static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 /* Display sctp endpoints (/proc/net/sctp/eps). */
 static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 {
+	struct seq_net_private *priv = seq->private;
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 	struct sctp_endpoint *ep;
@@ -213,6 +219,8 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 	sctp_for_each_hentry(epb, node, &head->chain) {
 		ep = sctp_ep(epb);
 		sk = epb->sk;
+		if (!net_eq(sock_net(sk), priv->net))
+			continue;
 		seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk,
 			   sctp_sk(sk)->type, sk->sk_state, hash,
 			   epb->bind_addr.port,
@@ -238,7 +246,8 @@ static const struct seq_operations sctp_eps_ops = {
 /* Initialize the seq file operations for 'eps' object. */
 static int sctp_eps_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &sctp_eps_ops);
+	return seq_open_net(inode, file, &sctp_eps_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations sctp_eps_seq_fops = {
@@ -249,11 +258,12 @@ static const struct file_operations sctp_eps_seq_fops = {
 };
 
 /* Set up the proc fs entry for 'eps' object. */
-int __init sctp_eps_proc_init(void)
+int __net_init sctp_eps_proc_init(struct net *net)
 {
 	struct proc_dir_entry *p;
 
-	p = proc_create("eps", S_IRUGO, proc_net_sctp, &sctp_eps_seq_fops);
+	p = proc_create("eps", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_eps_seq_fops);
 	if (!p)
 		return -ENOMEM;
 
@@ -261,9 +271,9 @@ int __init sctp_eps_proc_init(void)
 }
 
 /* Cleanup the proc fs entry for 'eps' object. */
-void sctp_eps_proc_exit(void)
+void sctp_eps_proc_exit(struct net *net)
 {
-	remove_proc_entry("eps", proc_net_sctp);
+	remove_proc_entry("eps", net->sctp.proc_net_sctp);
 }
 
 
@@ -300,6 +310,7 @@ static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 /* Display sctp associations (/proc/net/sctp/assocs). */
 static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 {
+	struct seq_net_private *priv = seq->private;
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 	struct sctp_association *assoc;
@@ -316,6 +327,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	sctp_for_each_hentry(epb, node, &head->chain) {
 		assoc = sctp_assoc(epb);
 		sk = epb->sk;
+		if (!net_eq(sock_net(sk), priv->net))
+			continue;
 		seq_printf(seq,
 			   "%8pK %8pK %-3d %-3d %-2d %-4d "
 			   "%4d %8d %8d %7d %5lu %-5d %5d ",
@@ -354,7 +367,8 @@ static const struct seq_operations sctp_assoc_ops = {
 /* Initialize the seq file operations for 'assocs' object. */
 static int sctp_assocs_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &sctp_assoc_ops);
+	return seq_open_net(inode, file, &sctp_assoc_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations sctp_assocs_seq_fops = {
@@ -365,11 +379,11 @@ static const struct file_operations sctp_assocs_seq_fops = {
 };
 
 /* Set up the proc fs entry for 'assocs' object. */
-int __init sctp_assocs_proc_init(void)
+int __net_init sctp_assocs_proc_init(struct net *net)
 {
 	struct proc_dir_entry *p;
 
-	p = proc_create("assocs", S_IRUGO, proc_net_sctp,
+	p = proc_create("assocs", S_IRUGO, net->sctp.proc_net_sctp,
 			&sctp_assocs_seq_fops);
 	if (!p)
 		return -ENOMEM;
@@ -378,9 +392,9 @@ int __init sctp_assocs_proc_init(void)
 }
 
 /* Cleanup the proc fs entry for 'assocs' object. */
-void sctp_assocs_proc_exit(void)
+void sctp_assocs_proc_exit(struct net *net)
 {
-	remove_proc_entry("assocs", proc_net_sctp);
+	remove_proc_entry("assocs", net->sctp.proc_net_sctp);
 }
 
 static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos)
@@ -412,6 +426,7 @@ static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v)
 
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 {
+	struct seq_net_private *priv = seq->private;
 	struct sctp_hashbucket *head;
 	struct sctp_ep_common *epb;
 	struct sctp_association *assoc;
@@ -426,6 +441,8 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 	sctp_local_bh_disable();
 	read_lock(&head->lock);
 	sctp_for_each_hentry(epb, node, &head->chain) {
+		if (!net_eq(sock_net(epb->sk), priv->net))
+			continue;
 		assoc = sctp_assoc(epb);
 		list_for_each_entry(tsp, &assoc->peer.transport_addr_list,
 					transports) {
@@ -489,14 +506,15 @@ static const struct seq_operations sctp_remaddr_ops = {
 };
 
 /* Cleanup the proc fs entry for 'remaddr' object. */
-void sctp_remaddr_proc_exit(void)
+void sctp_remaddr_proc_exit(struct net *net)
 {
-	remove_proc_entry("remaddr", proc_net_sctp);
+	remove_proc_entry("remaddr", net->sctp.proc_net_sctp);
 }
 
 static int sctp_remaddr_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open(file, &sctp_remaddr_ops);
+	return seq_open_net(inode, file, &sctp_remaddr_ops,
+			    sizeof(struct seq_net_private));
 }
 
 static const struct file_operations sctp_remaddr_seq_fops = {
@@ -506,11 +524,12 @@ static const struct file_operations sctp_remaddr_seq_fops = {
 	.release = seq_release,
 };
 
-int __init sctp_remaddr_proc_init(void)
+int __net_init sctp_remaddr_proc_init(struct net *net)
 {
 	struct proc_dir_entry *p;
 
-	p = proc_create("remaddr", S_IRUGO, proc_net_sctp, &sctp_remaddr_seq_fops);
+	p = proc_create("remaddr", S_IRUGO, net->sctp.proc_net_sctp,
+			&sctp_remaddr_seq_fops);
 	if (!p)
 		return -ENOMEM;
 	return 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 48a5989c98ce..de7994edb4ca 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -71,10 +71,6 @@
 struct sctp_globals sctp_globals __read_mostly;
 DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
 
-#ifdef CONFIG_PROC_FS
-struct proc_dir_entry	*proc_net_sctp;
-#endif
-
 struct idr sctp_assocs_id;
 DEFINE_SPINLOCK(sctp_assocs_id_lock);
 
@@ -91,60 +87,52 @@ int sysctl_sctp_rmem[3];
 int sysctl_sctp_wmem[3];
 
 /* Set up the proc fs entry for the SCTP protocol. */
-static __init int sctp_proc_init(void)
+static __net_init int sctp_proc_init(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	if (!proc_net_sctp) {
-		proc_net_sctp = proc_mkdir("sctp", init_net.proc_net);
-		if (!proc_net_sctp)
-			goto out_free_percpu;
-	}
-
-	if (sctp_snmp_proc_init())
+	net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net);
+	if (!net->sctp.proc_net_sctp)
+		goto out_proc_net_sctp;
+	if (sctp_snmp_proc_init(net))
 		goto out_snmp_proc_init;
-	if (sctp_eps_proc_init())
+	if (sctp_eps_proc_init(net))
 		goto out_eps_proc_init;
-	if (sctp_assocs_proc_init())
+	if (sctp_assocs_proc_init(net))
 		goto out_assocs_proc_init;
-	if (sctp_remaddr_proc_init())
+	if (sctp_remaddr_proc_init(net))
 		goto out_remaddr_proc_init;
 
 	return 0;
 
 out_remaddr_proc_init:
-	sctp_assocs_proc_exit();
+	sctp_assocs_proc_exit(net);
 out_assocs_proc_init:
-	sctp_eps_proc_exit();
+	sctp_eps_proc_exit(net);
 out_eps_proc_init:
-	sctp_snmp_proc_exit();
+	sctp_snmp_proc_exit(net);
 out_snmp_proc_init:
-	if (proc_net_sctp) {
-		proc_net_sctp = NULL;
-		remove_proc_entry("sctp", init_net.proc_net);
-	}
-out_free_percpu:
-#else
-	return 0;
-#endif /* CONFIG_PROC_FS */
+	remove_proc_entry("sctp", net->proc_net);
+	net->sctp.proc_net_sctp = NULL;
+out_proc_net_sctp:
 	return -ENOMEM;
+#endif /* CONFIG_PROC_FS */
+	return 0;
 }
 
 /* Clean up the proc fs entry for the SCTP protocol.
  * Note: Do not make this __exit as it is used in the init error
  * path.
  */
-static void sctp_proc_exit(void)
+static void sctp_proc_exit(struct net *net)
 {
 #ifdef CONFIG_PROC_FS
-	sctp_snmp_proc_exit();
-	sctp_eps_proc_exit();
-	sctp_assocs_proc_exit();
-	sctp_remaddr_proc_exit();
-
-	if (proc_net_sctp) {
-		proc_net_sctp = NULL;
-		remove_proc_entry("sctp", init_net.proc_net);
-	}
+	sctp_snmp_proc_exit(net);
+	sctp_eps_proc_exit(net);
+	sctp_assocs_proc_exit(net);
+	sctp_remaddr_proc_exit(net);
+
+	remove_proc_entry("sctp", net->proc_net);
+	net->sctp.proc_net_sctp = NULL;
 #endif
 }
 
@@ -1180,6 +1168,13 @@ static int sctp_net_init(struct net *net)
 {
 	int status;
 
+	/* Initialize proc fs directory.  */
+	status = sctp_proc_init(net);
+	if (status)
+		goto err_init_proc;
+
+	sctp_dbg_objcnt_init(net);
+
 	/* Initialize the control inode/socket for handling OOTB packets.  */
 	if ((status = sctp_ctl_sock_init(net))) {
 		pr_err("Failed to initialize the SCTP control sock\n");
@@ -1202,6 +1197,9 @@ static int sctp_net_init(struct net *net)
 	return 0;
 
 err_ctl_sock_init:
+	sctp_dbg_objcnt_exit(net);
+	sctp_proc_exit(net);
+err_init_proc:
 	return status;
 }
 
@@ -1213,6 +1211,10 @@ static void sctp_net_exit(struct net *net)
 
 	/* Free the control endpoint.  */
 	inet_ctl_sock_destroy(net->sctp.ctl_sock);
+
+	sctp_dbg_objcnt_exit(net);
+
+	sctp_proc_exit(net);
 }
 
 static struct pernet_operations sctp_net_ops = {
@@ -1259,14 +1261,6 @@ SCTP_STATIC __init int sctp_init(void)
 	if (status)
 		goto err_percpu_counter_init;
 
-	/* Initialize proc fs directory.  */
-	status = sctp_proc_init();
-	if (status)
-		goto err_init_proc;
-
-	/* Initialize object count debugging.  */
-	sctp_dbg_objcnt_init();
-
 	/*
 	 * 14. Suggested SCTP Protocol Parameter Values
 	 */
@@ -1476,9 +1470,6 @@ err_ehash_alloc:
 		   get_order(sctp_assoc_hashsize *
 			     sizeof(struct sctp_hashbucket)));
 err_ahash_alloc:
-	sctp_dbg_objcnt_exit();
-	sctp_proc_exit();
-err_init_proc:
 	percpu_counter_destroy(&sctp_sockets_allocated);
 err_percpu_counter_init:
 	cleanup_sctp_mibs();
@@ -1520,9 +1511,7 @@ SCTP_STATIC __exit void sctp_exit(void)
 		   get_order(sctp_port_hashsize *
 			     sizeof(struct sctp_bind_hashbucket)));
 
-	sctp_dbg_objcnt_exit();
 	percpu_counter_destroy(&sctp_sockets_allocated);
-	sctp_proc_exit();
 	cleanup_sctp_mibs();
 
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
-- 
cgit v1.2.3


From b01a24078fa3fc4f0f447d1306ce5adc495ead86 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 6 Aug 2012 08:47:55 +0000
Subject: sctp: Make the mib per network namespace

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/sctp.h |   3 +
 include/net/sctp/sctp.h  |   9 ++-
 net/sctp/associola.c     |   2 +-
 net/sctp/chunk.c         |   2 +-
 net/sctp/endpointola.c   |   2 +-
 net/sctp/input.c         |  22 +++----
 net/sctp/ipv6.c          |   4 +-
 net/sctp/output.c        |   2 +-
 net/sctp/outqueue.c      |  18 +++---
 net/sctp/proc.c          |   5 +-
 net/sctp/protocol.c      |  27 ++++----
 net/sctp/sm_statefuns.c  | 163 ++++++++++++++++++++++++++++-------------------
 net/sctp/ulpqueue.c      |  18 ++++--
 13 files changed, 158 insertions(+), 119 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index 9c20a82a77ec..06ccddf9566c 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -3,8 +3,11 @@
 
 struct sock;
 struct proc_dir_entry;
+struct sctp_mib;
 
 struct netns_sctp {
+	DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics);
+
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *proc_net_sctp;
 #endif
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index b49588a51d80..7dcd4dfd7c3f 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -221,11 +221,10 @@ extern struct kmem_cache *sctp_bucket_cachep __read_mostly;
 #define sctp_bh_unlock_sock(sk)  bh_unlock_sock(sk)
 
 /* SCTP SNMP MIB stats handlers */
-DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics);
-#define SCTP_INC_STATS(field)      SNMP_INC_STATS(sctp_statistics, field)
-#define SCTP_INC_STATS_BH(field)   SNMP_INC_STATS_BH(sctp_statistics, field)
-#define SCTP_INC_STATS_USER(field) SNMP_INC_STATS_USER(sctp_statistics, field)
-#define SCTP_DEC_STATS(field)      SNMP_DEC_STATS(sctp_statistics, field)
+#define SCTP_INC_STATS(net, field)      SNMP_INC_STATS((net)->sctp.sctp_statistics, field)
+#define SCTP_INC_STATS_BH(net, field)   SNMP_INC_STATS_BH((net)->sctp.sctp_statistics, field)
+#define SCTP_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)->sctp.sctp_statistics, field)
+#define SCTP_DEC_STATS(net, field)      SNMP_DEC_STATS((net)->sctp.sctp_statistics, field)
 
 #endif /* !TEST_FRAME */
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index ed4930b31341..8a1f27a7a001 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1150,7 +1150,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		if (sctp_chunk_is_data(chunk))
 			asoc->peer.last_data_from = chunk->transport;
 		else
-			SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS);
+			SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_INCTRLCHUNKS);
 
 		if (chunk->transport)
 			chunk->transport->last_time_heard = jiffies;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 6c8556459a75..7c2df9c33df3 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -257,7 +257,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	offset = 0;
 
 	if ((whole > 1) || (whole && over))
-		SCTP_INC_STATS_USER(SCTP_MIB_FRAGUSRMSGS);
+		SCTP_INC_STATS_USER(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS);
 
 	/* Create chunks for all the full sized DATA chunks. */
 	for (i=0, len=first_len; i < whole; i++) {
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 6b763939345b..3edca80ab3a1 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -478,7 +478,7 @@ normal:
 		if (asoc && sctp_chunk_is_data(chunk))
 			asoc->peer.last_data_from = chunk->transport;
 		else
-			SCTP_INC_STATS(SCTP_MIB_INCTRLCHUNKS);
+			SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS);
 
 		if (chunk->transport)
 			chunk->transport->last_time_heard = jiffies;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index c9a0449bde53..530830151f04 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -83,7 +83,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
 
 
 /* Calculate the SCTP checksum of an SCTP packet.  */
-static inline int sctp_rcv_checksum(struct sk_buff *skb)
+static inline int sctp_rcv_checksum(struct net *net, struct sk_buff *skb)
 {
 	struct sctphdr *sh = sctp_hdr(skb);
 	__le32 cmp = sh->checksum;
@@ -99,7 +99,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
 
 	if (val != cmp) {
 		/* CRC failure, dump it. */
-		SCTP_INC_STATS_BH(SCTP_MIB_CHECKSUMERRORS);
+		SCTP_INC_STATS_BH(net, SCTP_MIB_CHECKSUMERRORS);
 		return -1;
 	}
 	return 0;
@@ -137,7 +137,7 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb->pkt_type!=PACKET_HOST)
 		goto discard_it;
 
-	SCTP_INC_STATS_BH(SCTP_MIB_INSCTPPACKS);
+	SCTP_INC_STATS_BH(net, SCTP_MIB_INSCTPPACKS);
 
 	if (skb_linearize(skb))
 		goto discard_it;
@@ -149,7 +149,7 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb->len < sizeof(struct sctphdr))
 		goto discard_it;
 	if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
-		  sctp_rcv_checksum(skb) < 0)
+		  sctp_rcv_checksum(net, skb) < 0)
 		goto discard_it;
 
 	skb_pull(skb, sizeof(struct sctphdr));
@@ -220,7 +220,7 @@ int sctp_rcv(struct sk_buff *skb)
 	 */
 	if (!asoc) {
 		if (sctp_rcv_ootb(skb)) {
-			SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES);
+			SCTP_INC_STATS_BH(net, SCTP_MIB_OUTOFBLUES);
 			goto discard_release;
 		}
 	}
@@ -276,9 +276,9 @@ int sctp_rcv(struct sk_buff *skb)
 			skb = NULL; /* sctp_chunk_free already freed the skb */
 			goto discard_release;
 		}
-		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
+		SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_BACKLOG);
 	} else {
-		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ);
+		SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_SOFTIRQ);
 		sctp_inq_push(&chunk->rcvr->inqueue, chunk);
 	}
 
@@ -293,7 +293,7 @@ int sctp_rcv(struct sk_buff *skb)
 	return 0;
 
 discard_it:
-	SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS);
+	SCTP_INC_STATS_BH(net, SCTP_MIB_IN_PKT_DISCARDS);
 	kfree_skb(skb);
 	return 0;
 
@@ -543,7 +543,7 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
 	 * servers this needs to be solved differently.
 	 */
 	if (sock_owned_by_user(sk))
-		NET_INC_STATS_BH(&init_net, LINUX_MIB_LOCKDROPPEDICMPS);
+		NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
 
 	*app = asoc;
 	*tpp = transport;
@@ -593,7 +593,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	struct net *net = dev_net(skb->dev);
 
 	if (skb->len < ihlen + 8) {
-		ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
 	}
 
@@ -607,7 +607,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	skb->network_header = saveip;
 	skb->transport_header = savesctp;
 	if (!sk) {
-		ICMP_INC_STATS_BH(&init_net, ICMP_MIB_INERRORS);
+		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 		return;
 	}
 	/* Warning:  The sock lock is held.  Remember to call
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index a18cda60e156..ea14cb445295 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -169,7 +169,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	skb->network_header   = saveip;
 	skb->transport_header = savesctp;
 	if (!sk) {
-		ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS);
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_INERRORS);
 		goto out;
 	}
 
@@ -243,7 +243,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 			  __func__, skb, skb->len,
 			  &fl6.saddr, &fl6.daddr);
 
-	SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
+	SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
 
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
 		skb->local_df = 1;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 838e18b4d7ea..0c6359bb973c 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -597,7 +597,7 @@ out:
 	return err;
 no_route:
 	kfree_skb(nskb);
-	IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
+	IP_INC_STATS_BH(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
 
 	/* FIXME: Returning the 'err' will effect all the associations
 	 * associated with a socket, although only one of the paths of the
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e7aa177c9522..072bf6ae3c26 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -299,6 +299,7 @@ void sctp_outq_free(struct sctp_outq *q)
 /* Put a new chunk in an sctp_outq.  */
 int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 {
+	struct net *net = sock_net(q->asoc->base.sk);
 	int error = 0;
 
 	SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n",
@@ -337,15 +338,15 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 
 			sctp_outq_tail_data(q, chunk);
 			if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
-				SCTP_INC_STATS(SCTP_MIB_OUTUNORDERCHUNKS);
+				SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
 			else
-				SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
+				SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
 			q->empty = 0;
 			break;
 		}
 	} else {
 		list_add_tail(&chunk->list, &q->control_chunk_list);
-		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+		SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 	}
 
 	if (error < 0)
@@ -478,11 +479,12 @@ void sctp_retransmit_mark(struct sctp_outq *q,
 void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 		     sctp_retransmit_reason_t reason)
 {
+	struct net *net = sock_net(q->asoc->base.sk);
 	int error = 0;
 
 	switch(reason) {
 	case SCTP_RTXR_T3_RTX:
-		SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS);
+		SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
 		/* Update the retran path if the T3-rtx timer has expired for
 		 * the current retran path.
@@ -493,15 +495,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 			transport->asoc->unack_data;
 		break;
 	case SCTP_RTXR_FAST_RTX:
-		SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
+		SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
 		q->fast_rtx = 1;
 		break;
 	case SCTP_RTXR_PMTUD:
-		SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
+		SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS);
 		break;
 	case SCTP_RTXR_T1_RTX:
-		SCTP_INC_STATS(SCTP_MIB_T1_RETRANSMITS);
+		SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS);
 		transport->asoc->init_retries++;
 		break;
 	default:
@@ -1914,6 +1916,6 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
 
 	if (ftsn_chunk) {
 		list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
-		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+		SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
 	}
 }
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index dc79a3aa0382..3e62ee55228f 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -83,12 +83,9 @@ static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
 	struct net *net = seq->private;
 	int i;
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	for (i = 0; sctp_snmp_list[i].name != NULL; i++)
 		seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
-			   snmp_fold_field((void __percpu **)sctp_statistics,
+			   snmp_fold_field((void __percpu **)net->sctp.sctp_statistics,
 				      sctp_snmp_list[i].entry));
 
 	return 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 059c914c09f2..d58db315db85 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -69,7 +69,6 @@
 
 /* Global data structures. */
 struct sctp_globals sctp_globals __read_mostly;
-DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly;
 
 struct idr sctp_assocs_id;
 DEFINE_SPINLOCK(sctp_assocs_id_lock);
@@ -961,7 +960,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
 	inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ?
 			 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
 
-	SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
+	SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS);
 	return ip_queue_xmit(skb, &transport->fl);
 }
 
@@ -1102,16 +1101,16 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
 	return 1;
 }
 
-static inline int init_sctp_mibs(void)
+static inline int init_sctp_mibs(struct net *net)
 {
-	return snmp_mib_init((void __percpu **)sctp_statistics,
+	return snmp_mib_init((void __percpu **)net->sctp.sctp_statistics,
 			     sizeof(struct sctp_mib),
 			     __alignof__(struct sctp_mib));
 }
 
-static inline void cleanup_sctp_mibs(void)
+static inline void cleanup_sctp_mibs(struct net *net)
 {
-	snmp_mib_free((void __percpu **)sctp_statistics);
+	snmp_mib_free((void __percpu **)net->sctp.sctp_statistics);
 }
 
 static void sctp_v4_pf_init(void)
@@ -1170,6 +1169,11 @@ static int sctp_net_init(struct net *net)
 {
 	int status;
 
+	/* Allocate and initialise sctp mibs.  */
+	status = init_sctp_mibs(net);
+	if (status)
+		goto err_init_mibs;
+
 	/* Initialize proc fs directory.  */
 	status = sctp_proc_init(net);
 	if (status)
@@ -1202,6 +1206,8 @@ err_ctl_sock_init:
 	sctp_dbg_objcnt_exit(net);
 	sctp_proc_exit(net);
 err_init_proc:
+	cleanup_sctp_mibs(net);
+err_init_mibs:
 	return status;
 }
 
@@ -1217,6 +1223,7 @@ static void sctp_net_exit(struct net *net)
 	sctp_dbg_objcnt_exit(net);
 
 	sctp_proc_exit(net);
+	cleanup_sctp_mibs(net);
 }
 
 static struct pernet_operations sctp_net_ops = {
@@ -1254,11 +1261,6 @@ SCTP_STATIC __init int sctp_init(void)
 	if (!sctp_chunk_cachep)
 		goto err_chunk_cachep;
 
-	/* Allocate and initialise sctp mibs.  */
-	status = init_sctp_mibs();
-	if (status)
-		goto err_init_mibs;
-
 	status = percpu_counter_init(&sctp_sockets_allocated, 0);
 	if (status)
 		goto err_percpu_counter_init;
@@ -1474,8 +1476,6 @@ err_ehash_alloc:
 err_ahash_alloc:
 	percpu_counter_destroy(&sctp_sockets_allocated);
 err_percpu_counter_init:
-	cleanup_sctp_mibs();
-err_init_mibs:
 	kmem_cache_destroy(sctp_chunk_cachep);
 err_chunk_cachep:
 	kmem_cache_destroy(sctp_bucket_cachep);
@@ -1514,7 +1514,6 @@ SCTP_STATIC __exit void sctp_exit(void)
 			     sizeof(struct sctp_bind_hashbucket)));
 
 	percpu_counter_destroy(&sctp_sockets_allocated);
-	cleanup_sctp_mibs();
 
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f2daf615e8fe..bee5e2c288d8 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -213,6 +213,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
+	struct net *net;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -260,8 +261,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
 
-	SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
-	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+	net = sock_net(asoc->base.sk);
+	SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
+	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 
@@ -322,7 +324,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	 */
 	net = sock_net(ep->base.sk);
 	if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
-		SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+		SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
 	}
 
@@ -369,7 +371,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 			if (packet) {
 				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 						SCTP_PACKET(packet));
-				SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+				SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 				return SCTP_DISPOSITION_CONSUME;
 			} else {
 				return SCTP_DISPOSITION_NOMEM;
@@ -540,7 +542,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 			if (packet) {
 				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 						SCTP_PACKET(packet));
-				SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+				SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
 				error = SCTP_ERROR_INV_PARAM;
 			}
 		}
@@ -559,7 +561,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 		if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+		SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_ABORTEDS);
 		return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED,
 						asoc, chunk->transport);
 	}
@@ -656,7 +658,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	 */
 	net = sock_net(ep->base.sk);
 	if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
-		SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+		SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
 	}
 
@@ -809,8 +811,8 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_ESTABLISHED));
-	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
-	SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS);
+	SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
 
 	if (new_asoc->autoclose)
@@ -868,6 +870,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
+	struct net *net;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -897,8 +900,9 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_ESTABLISHED));
-	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
-	SCTP_INC_STATS(SCTP_MIB_ACTIVEESTABS);
+	net = sock_net(ep->base.sk);
+	SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
 	if (asoc->autoclose)
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START,
@@ -972,13 +976,15 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 
 	if (asoc->overall_error_count >= asoc->max_retrans) {
+		struct net *net;
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
 				SCTP_ERROR(ETIMEDOUT));
 		/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_NO_ERROR));
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		net = sock_net(ep->base.sk);
+		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_DELETE_TCB;
 	}
 
@@ -1213,7 +1219,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 		goto out;
 	sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT, SCTP_PACKET(pkt));
 
-	SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+	SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 
 	/* Discard the rest of the inbound packet. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
@@ -1427,7 +1433,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 			if (packet) {
 				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 						SCTP_PACKET(packet));
-				SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+				SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
 				retval = SCTP_DISPOSITION_CONSUME;
 			} else {
 				retval = SCTP_DISPOSITION_NOMEM;
@@ -1791,7 +1797,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_ESTABLISHED));
-	SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(sock_net(new_asoc->base.sk), SCTP_MIB_CURRESTAB);
 	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -1883,7 +1889,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
 		sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 				SCTP_STATE(SCTP_STATE_ESTABLISHED));
-		SCTP_INC_STATS(SCTP_MIB_CURRESTAB);
+		SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_CURRESTAB);
 		sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START,
 				SCTP_NULL());
 
@@ -2417,6 +2423,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
 	__be16 error = SCTP_ERROR_NO_ERROR;
+	struct net *net;
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
@@ -2433,8 +2440,9 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
 	/* ASSOC_FAILED will DELETE_TCB. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error));
-	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+	net = sock_net(ep->base.sk);
+	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 
 	return SCTP_DISPOSITION_ABORT;
 }
@@ -2521,7 +2529,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
 	SCTP_DEBUG_PRINTK("ABORT received (INIT).\n");
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
-	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_ABORTEDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err));
@@ -2904,11 +2912,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
 		break;
 	case SCTP_IERROR_HIGH_TSN:
 	case SCTP_IERROR_BAD_STREAM:
-		SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
+		SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
 		goto discard_noforce;
 	case SCTP_IERROR_DUP_TSN:
 	case SCTP_IERROR_IGNORE_TSN:
-		SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
+		SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
 		goto discard_force;
 	case SCTP_IERROR_NO_DATA:
 		goto consume;
@@ -3197,7 +3205,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
 		sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 				SCTP_PACKET(packet));
 
-		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+		SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 
 		sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 		return SCTP_DISPOSITION_CONSUME;
@@ -3260,6 +3268,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 	struct sctp_ulpevent *ev;
+	struct net *net;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -3299,8 +3308,9 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
-	SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
-	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+	net = sock_net(asoc->base.sk);
+	SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
+	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
 
 	/* ...and remove all record of the association. */
@@ -3346,8 +3356,10 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 	__u8 *ch_end;
 	int ootb_shut_ack = 0;
 	int ootb_cookie_ack = 0;
+	struct net *net;
 
-	SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+	net = sock_net(asoc->base.sk);
+	SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
 	ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
 	do {
@@ -3461,7 +3473,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
 		sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 				SCTP_PACKET(packet));
 
-		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+		SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 
 		/* If the chunk length is invalid, we don't want to process
 		 * the reset of the packet.
@@ -3508,7 +3520,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
 	 * packet and the state function that handles OOTB SHUTDOWN_ACK is
 	 * called with a NULL association.
 	 */
-	SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES);
+	SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTOFBLUES);
 
 	return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands);
 }
@@ -3699,6 +3711,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 	 */
 	if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) &&
 	    !(asoc->addip_last_asconf)) {
+		struct net *net;
 		abort = sctp_make_abort(asoc, asconf_ack,
 					sizeof(sctp_errhdr_t));
 		if (abort) {
@@ -3716,12 +3729,14 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 				SCTP_ERROR(ECONNABORTED));
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		net = sock_net(asoc->base.sk);
+		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_ABORT;
 	}
 
 	if ((rcvd_serial == sent_serial) && asoc->addip_last_asconf) {
+		struct net *net;
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
 
@@ -3750,8 +3765,9 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 				SCTP_ERROR(ECONNABORTED));
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		net = sock_net(asoc->base.sk);
+		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_ABORT;
 	}
 
@@ -4222,7 +4238,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
-	SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS);
+	SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_PKT_DISCARDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
 
 	return SCTP_DISPOSITION_CONSUME;
@@ -4315,7 +4331,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 		}
 
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
-		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+		SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 
 		if (asoc->state <= SCTP_STATE_COOKIE_ECHOED) {
 			sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -4329,7 +4345,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 					SCTP_ERROR(ECONNABORTED));
 			sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 					SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
-			SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+			SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		}
 	} else {
 		packet = sctp_ootb_pkt_new(net, asoc, chunk);
@@ -4347,10 +4363,10 @@ static sctp_disposition_t sctp_sf_abort_violation(
 		sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 			SCTP_PACKET(packet));
 
-		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+		SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 	}
 
-	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 
 discard:
 	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
@@ -4410,6 +4426,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 	struct sctp_chunk *chunk =  arg;
 	struct sctp_paramhdr *param = ext;
 	struct sctp_chunk *abort = NULL;
+	struct net *net;
 
 	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
 		goto discard;
@@ -4419,15 +4436,16 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 	if (!abort)
 		goto nomem;
 
+	net = sock_net(asoc->base.sk);
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
-	SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+	SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
 			SCTP_ERROR(ECONNABORTED));
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 			SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION));
-	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
-	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 
 discard:
 	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
@@ -4757,6 +4775,7 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	 */
 	struct sctp_chunk *abort = arg;
 	sctp_disposition_t retval;
+	struct net *net;
 
 	retval = SCTP_DISPOSITION_CONSUME;
 
@@ -4772,8 +4791,9 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 			SCTP_PERR(SCTP_ERROR_USER_ABORT));
 
-	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+	net = sock_net(asoc->base.sk);
+	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 
 	return retval;
 }
@@ -4824,13 +4844,15 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
+	struct net *net = sock_net(asoc->base.sk);
+
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
 
-	SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS);
+	SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL());
 
@@ -4886,6 +4908,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 {
 	struct sctp_chunk *abort = arg;
 	sctp_disposition_t retval;
+	struct net *net = sock_net(asoc->base.sk);
 
 	/* Stop T1-init timer */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -4897,7 +4920,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
 
-	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
+	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 
 	/* Even if we can't send the ABORT due to low memory delete the
 	 * TCB.  This is a departure from our typical NOMEM handling.
@@ -5318,8 +5341,9 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_transport *transport = arg;
+	struct net *net = sock_net(asoc->base.sk);
 
-	SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS);
+	SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS);
 
 	if (asoc->overall_error_count >= asoc->max_retrans) {
 		if (asoc->state == SCTP_STATE_SHUTDOWN_PENDING) {
@@ -5340,8 +5364,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
 			/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
 			sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 					SCTP_PERR(SCTP_ERROR_NO_ERROR));
-			SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-			SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+			SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+			SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 			return SCTP_DISPOSITION_DELETE_TCB;
 		}
 	}
@@ -5403,7 +5427,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
-	SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS);
+	struct net *net = sock_net(asoc->base.sk);
+	SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
 	return SCTP_DISPOSITION_CONSUME;
 }
@@ -5436,9 +5461,10 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
 	struct sctp_chunk *repl = NULL;
 	struct sctp_bind_addr *bp;
 	int attempts = asoc->init_err_counter + 1;
+	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
-	SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS);
+	SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS);
 
 	if (attempts <= asoc->max_init_attempts) {
 		bp = (struct sctp_bind_addr *) &asoc->base.bind_addr;
@@ -5496,9 +5522,10 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
 {
 	struct sctp_chunk *repl = NULL;
 	int attempts = asoc->init_err_counter + 1;
+	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
-	SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS);
+	SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS);
 
 	if (attempts <= asoc->max_init_attempts) {
 		repl = sctp_make_cookie_echo(asoc, NULL);
@@ -5543,9 +5570,10 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
 					   sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *reply = NULL;
+	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
-	SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
+	SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
 
 	((struct sctp_association *)asoc)->shutdown_retries++;
 
@@ -5555,8 +5583,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
 		/* Note:  CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_NO_ERROR));
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_DELETE_TCB;
 	}
 
@@ -5613,8 +5641,9 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 {
 	struct sctp_chunk *chunk = asoc->addip_last_asconf;
 	struct sctp_transport *transport = chunk->transport;
+	struct net *net = sock_net(asoc->base.sk);
 
-	SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS);
+	SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS);
 
 	/* ADDIP 4.1 B1) Increment the error counters and perform path failure
 	 * detection on the appropriate destination address as defined in
@@ -5639,8 +5668,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 				SCTP_ERROR(ETIMEDOUT));
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_NO_ERROR));
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_ABORT;
 	}
 
@@ -5682,9 +5711,10 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
 					   sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *reply = NULL;
+	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
-	SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
+	SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
 
 	reply = sctp_make_abort(asoc, NULL, 0);
 	if (!reply)
@@ -5696,8 +5726,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 			SCTP_PERR(SCTP_ERROR_NO_ERROR));
 
-	SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-	SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 
 	return SCTP_DISPOSITION_DELETE_TCB;
 nomem:
@@ -5716,9 +5746,10 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	int disposition;
 
-	SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS);
+	SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS);
 
 	/* From 9.2 Shutdown of an Association
 	 * Upon receipt of the SHUTDOWN primitive from its upper
@@ -5976,7 +6007,7 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
 			sctp_packet_append_chunk(packet, err_chunk);
 			sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 					SCTP_PACKET(packet));
-			SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
+			SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 		} else
 			sctp_chunk_free (err_chunk);
 	}
@@ -5996,6 +6027,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	__u32 tsn;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
 	struct sock *sk = asoc->base.sk;
+	struct net *net;
 	u16 ssn;
 	u16 sid;
 	u8 ordered = 0;
@@ -6112,6 +6144,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * No User Data:  This error cause is returned to the originator of a
 	 * DATA chunk if a received DATA chunk has no user data.
 	 */
+	net = sock_net(sk);
 	if (unlikely(0 == datalen)) {
 		err = sctp_make_abort_no_data(asoc, chunk, tsn);
 		if (err) {
@@ -6126,8 +6159,8 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 				SCTP_ERROR(ECONNABORTED));
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_NO_DATA));
-		SCTP_INC_STATS(SCTP_MIB_ABORTEDS);
-		SCTP_DEC_STATS(SCTP_MIB_CURRESTAB);
+		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_IERROR_NO_DATA;
 	}
 
@@ -6137,9 +6170,9 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * if we renege and the chunk arrives again.
 	 */
 	if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
-		SCTP_INC_STATS(SCTP_MIB_INUNORDERCHUNKS);
+		SCTP_INC_STATS(net, SCTP_MIB_INUNORDERCHUNKS);
 	else {
-		SCTP_INC_STATS(SCTP_MIB_INORDERCHUNKS);
+		SCTP_INC_STATS(net, SCTP_MIB_INORDERCHUNKS);
 		ordered = 1;
 	}
 
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index f5a6a4f4faf7..360d8697b95c 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -326,7 +326,9 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq,
  * payload was fragmented on the way and ip had to reassemble them.
  * We add the rest of skb's to the first skb's fraglist.
  */
-static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag)
+static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
+	struct sk_buff_head *queue, struct sk_buff *f_frag,
+	struct sk_buff *l_frag)
 {
 	struct sk_buff *pos;
 	struct sk_buff *new = NULL;
@@ -394,7 +396,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
 	}
 
 	event = sctp_skb2event(f_frag);
-	SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
+	SCTP_INC_STATS(net, SCTP_MIB_REASMUSRMSGS);
 
 	return event;
 }
@@ -493,7 +495,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
 		cevent = sctp_skb2event(pd_first);
 		pd_point = sctp_sk(asoc->base.sk)->pd_point;
 		if (pd_point && pd_point <= pd_len) {
-			retval = sctp_make_reassembled_event(&ulpq->reasm,
+			retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+							     &ulpq->reasm,
 							     pd_first,
 							     pd_last);
 			if (retval)
@@ -503,7 +506,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
 done:
 	return retval;
 found:
-	retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos);
+	retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+					     &ulpq->reasm, first_frag, pos);
 	if (retval)
 		retval->msg_flags |= MSG_EOR;
 	goto done;
@@ -563,7 +567,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq)
 	 * further.
 	 */
 done:
-	retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
+	retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+					&ulpq->reasm, first_frag, last_frag);
 	if (retval && is_last)
 		retval->msg_flags |= MSG_EOR;
 
@@ -655,7 +660,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq)
 	 * further.
 	 */
 done:
-	retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag);
+	retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+					&ulpq->reasm, first_frag, last_frag);
 	return retval;
 }
 
-- 
cgit v1.2.3


From ebb7e95d9351f77a8ec1fca20eb645051401b7b2 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Aug 2012 07:23:59 +0000
Subject: sctp: Add infrastructure for per net sysctls

Start with an empty sctp_net_table that will be populated as the various
tunable sysctls are made per net.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/sctp.h |  6 +++++-
 include/net/sctp/sctp.h  |  4 ++++
 net/sctp/protocol.c      |  7 +++++++
 net/sctp/sysctl.c        | 21 +++++++++++++++++++++
 4 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index 06ccddf9566c..9576b60cbd25 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -4,6 +4,7 @@
 struct sock;
 struct proc_dir_entry;
 struct sctp_mib;
+struct ctl_table_header;
 
 struct netns_sctp {
 	DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics);
@@ -11,7 +12,9 @@ struct netns_sctp {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry *proc_net_sctp;
 #endif
-
+#ifdef CONFIG_SYSCTL
+	struct ctl_table_header *sysctl_header;
+#endif
 	/* This is the global socket data structure used for responding to
 	 * the Out-of-the-blue (OOTB) packets.  A control sock will be created
 	 * for this socket at the initialization time.
@@ -32,6 +35,7 @@ struct netns_sctp {
 
 	/* Lock that protects the local_addr_list writers */
 	spinlock_t local_addr_lock;
+
 };
 
 #endif /* __NETNS_SCTP_H__ */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 7dcd4dfd7c3f..c07421d1772f 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -375,9 +375,13 @@ static inline void sctp_dbg_objcnt_exit(struct net *) { return; }
 #if defined CONFIG_SYSCTL
 void sctp_sysctl_register(void);
 void sctp_sysctl_unregister(void);
+int sctp_sysctl_net_register(struct net *net);
+void sctp_sysctl_net_unregister(struct net *net);
 #else
 static inline void sctp_sysctl_register(void) { return; }
 static inline void sctp_sysctl_unregister(void) { return; }
+static inline int sctp_sysctl_net_register(struct net *net) { return 0; }
+static inline void sctp_sysctl_net_unregister(struct net *net) { return; }
 #endif
 
 /* Size of Supported Address Parameter for 'x' address types. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d58db315db85..0f2342be61f3 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1169,6 +1169,10 @@ static int sctp_net_init(struct net *net)
 {
 	int status;
 
+	status = sctp_sysctl_net_register(net);
+	if (status)
+		goto err_sysctl_register;
+
 	/* Allocate and initialise sctp mibs.  */
 	status = init_sctp_mibs(net);
 	if (status)
@@ -1208,6 +1212,8 @@ err_ctl_sock_init:
 err_init_proc:
 	cleanup_sctp_mibs(net);
 err_init_mibs:
+	sctp_sysctl_net_unregister(net);
+err_sysctl_register:
 	return status;
 }
 
@@ -1224,6 +1230,7 @@ static void sctp_net_exit(struct net *net)
 
 	sctp_proc_exit(net);
 	cleanup_sctp_mibs(net);
+	sctp_sysctl_net_unregister(net);
 }
 
 static struct pernet_operations sctp_net_ops = {
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 2b2bfe933ff1..bee36c408dde 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -284,6 +284,27 @@ static ctl_table sctp_table[] = {
 	{ /* sentinel */ }
 };
 
+static ctl_table sctp_net_table[] = {
+	{ /* sentinel */ }
+};
+
+int sctp_sysctl_net_register(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
+	return 0;
+}
+
+void sctp_sysctl_net_unregister(struct net *net)
+{
+	unregister_net_sysctl_table(net->sctp.sysctl_header);
+}
+
 static struct ctl_table_header * sctp_sysctl_header;
 
 /* Sysctl registration.  */
-- 
cgit v1.2.3


From 55e26eb95a5345a5796babac98de6d6c42771df1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Aug 2012 07:25:24 +0000
Subject: sctp: Push struct net down to sctp_chunk_event_lookup

This trickles up through sctp_sm_lookup_event up to sctp_do_sm
and up further into sctp_primitiv_NAME before the code reaches
places where struct net can be reliably found.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h  | 12 ++++++------
 include/net/sctp/sm.h    |  5 +++--
 net/sctp/associola.c     |  5 +++--
 net/sctp/endpointola.c   |  4 +++-
 net/sctp/input.c         |  4 +++-
 net/sctp/primitive.c     |  4 ++--
 net/sctp/sm_sideeffect.c | 24 ++++++++++++++++--------
 net/sctp/sm_statetable.c | 11 +++++++----
 net/sctp/socket.c        | 27 +++++++++++++++++----------
 9 files changed, 60 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index c07421d1772f..aaa82923bbaa 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -139,12 +139,12 @@ extern int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
 /*
  * sctp/primitive.c
  */
-int sctp_primitive_ASSOCIATE(struct sctp_association *, void *arg);
-int sctp_primitive_SHUTDOWN(struct sctp_association *, void *arg);
-int sctp_primitive_ABORT(struct sctp_association *, void *arg);
-int sctp_primitive_SEND(struct sctp_association *, void *arg);
-int sctp_primitive_REQUESTHEARTBEAT(struct sctp_association *, void *arg);
-int sctp_primitive_ASCONF(struct sctp_association *, void *arg);
+int sctp_primitive_ASSOCIATE(struct net *, struct sctp_association *, void *arg);
+int sctp_primitive_SHUTDOWN(struct net *, struct sctp_association *, void *arg);
+int sctp_primitive_ABORT(struct net *, struct sctp_association *, void *arg);
+int sctp_primitive_SEND(struct net *, struct sctp_association *, void *arg);
+int sctp_primitive_REQUESTHEARTBEAT(struct net *, struct sctp_association *, void *arg);
+int sctp_primitive_ASCONF(struct net *, struct sctp_association *, void *arg);
 
 /*
  * sctp/input.c
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 9148632b8204..bcef13023ac3 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -178,7 +178,8 @@ sctp_state_fn_t sctp_sf_autoclose_timer_expire;
 
 /* Prototypes for utility support functions.  */
 __u8 sctp_get_chunk_type(struct sctp_chunk *chunk);
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t,
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *,
+					    sctp_event_t,
 					    sctp_state_t,
 					    sctp_subtype_t);
 int sctp_chunk_iif(const struct sctp_chunk *);
@@ -268,7 +269,7 @@ void sctp_chunk_assign_ssn(struct sctp_chunk *);
 
 /* Prototypes for statetable processing. */
 
-int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
+int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
 	       sctp_state_t state,
                struct sctp_endpoint *,
                struct sctp_association *asoc,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 8a1f27a7a001..6bcbecafe393 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1118,6 +1118,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 	struct sctp_association *asoc =
 		container_of(work, struct sctp_association,
 			     base.inqueue.immediate);
+	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_endpoint *ep;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
@@ -1150,13 +1151,13 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
 		if (sctp_chunk_is_data(chunk))
 			asoc->peer.last_data_from = chunk->transport;
 		else
-			SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_INCTRLCHUNKS);
+			SCTP_INC_STATS(net, SCTP_MIB_INCTRLCHUNKS);
 
 		if (chunk->transport)
 			chunk->transport->last_time_heard = jiffies;
 
 		/* Run through the state machine. */
-		error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype,
+		error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype,
 				   state, ep, asoc, chunk, GFP_ATOMIC);
 
 		/* Check to see if the association is freed in response to
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3edca80ab3a1..8315792ef2ba 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -413,6 +413,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
 			     base.inqueue.immediate);
 	struct sctp_association *asoc;
 	struct sock *sk;
+	struct net *net;
 	struct sctp_transport *transport;
 	struct sctp_chunk *chunk;
 	struct sctp_inq *inqueue;
@@ -427,6 +428,7 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work)
 	asoc = NULL;
 	inqueue = &ep->base.inqueue;
 	sk = ep->base.sk;
+	net = sock_net(sk);
 
 	while (NULL != (chunk = sctp_inq_pop(inqueue))) {
 		subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type);
@@ -483,7 +485,7 @@ normal:
 		if (chunk->transport)
 			chunk->transport->last_time_heard = jiffies;
 
-		error = sctp_do_sm(SCTP_EVENT_T_CHUNK, subtype, state,
+		error = sctp_do_sm(net, SCTP_EVENT_T_CHUNK, subtype, state,
 				   ep, asoc, chunk, GFP_ATOMIC);
 
 		if (error && chunk)
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 530830151f04..a2ceb70ee06c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -466,11 +466,13 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 		}
 			
 	} else {
+		struct net *net = sock_net(sk);
+
 		if (timer_pending(&t->proto_unreach_timer) &&
 		    del_timer(&t->proto_unreach_timer))
 			sctp_association_put(asoc);
 
-		sctp_do_sm(SCTP_EVENT_T_OTHER,
+		sctp_do_sm(net, SCTP_EVENT_T_OTHER,
 			   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
 			   asoc->state, asoc->ep, asoc, t,
 			   GFP_ATOMIC);
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index 534c7eae9d15..794bb14decde 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -57,7 +57,7 @@
 
 #define DECLARE_PRIMITIVE(name) \
 /* This is called in the code as sctp_primitive_ ## name.  */ \
-int sctp_primitive_ ## name(struct sctp_association *asoc, \
+int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
 			    void *arg) { \
 	int error = 0; \
 	sctp_event_t event_type; sctp_subtype_t subtype; \
@@ -69,7 +69,7 @@ int sctp_primitive_ ## name(struct sctp_association *asoc, \
 	state = asoc ? asoc->state : SCTP_STATE_CLOSED; \
 	ep = asoc ? asoc->ep : NULL; \
 	\
-	error = sctp_do_sm(event_type, subtype, state, ep, asoc, \
+	error = sctp_do_sm(net, event_type, subtype, state, ep, asoc,	\
 			   arg, GFP_KERNEL); \
 	return error; \
 }
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index fe99628e1257..02c4c1c066a7 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -251,6 +251,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
 	int error;
 	struct sctp_transport *transport = (struct sctp_transport *) peer;
 	struct sctp_association *asoc = transport->asoc;
+	struct net *net = sock_net(asoc->base.sk);
 
 	/* Check whether a task is in the sock.  */
 
@@ -271,7 +272,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer)
 		goto out_unlock;
 
 	/* Run through the state machine.  */
-	error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+	error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
 			   SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_T3_RTX),
 			   asoc->state,
 			   asoc->ep, asoc,
@@ -291,6 +292,7 @@ out_unlock:
 static void sctp_generate_timeout_event(struct sctp_association *asoc,
 					sctp_event_timeout_t timeout_type)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	int error = 0;
 
 	sctp_bh_lock_sock(asoc->base.sk);
@@ -312,7 +314,7 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc,
 		goto out_unlock;
 
 	/* Run through the state machine.  */
-	error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+	error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
 			   SCTP_ST_TIMEOUT(timeout_type),
 			   asoc->state, asoc->ep, asoc,
 			   (void *)timeout_type, GFP_ATOMIC);
@@ -371,6 +373,7 @@ void sctp_generate_heartbeat_event(unsigned long data)
 	int error = 0;
 	struct sctp_transport *transport = (struct sctp_transport *) data;
 	struct sctp_association *asoc = transport->asoc;
+	struct net *net = sock_net(asoc->base.sk);
 
 	sctp_bh_lock_sock(asoc->base.sk);
 	if (sock_owned_by_user(asoc->base.sk)) {
@@ -388,7 +391,7 @@ void sctp_generate_heartbeat_event(unsigned long data)
 	if (transport->dead)
 		goto out_unlock;
 
-	error = sctp_do_sm(SCTP_EVENT_T_TIMEOUT,
+	error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
 			   SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_HEARTBEAT),
 			   asoc->state, asoc->ep, asoc,
 			   transport, GFP_ATOMIC);
@@ -408,6 +411,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
 {
 	struct sctp_transport *transport = (struct sctp_transport *) data;
 	struct sctp_association *asoc = transport->asoc;
+	struct net *net = sock_net(asoc->base.sk);
 	
 	sctp_bh_lock_sock(asoc->base.sk);
 	if (sock_owned_by_user(asoc->base.sk)) {
@@ -426,7 +430,7 @@ void sctp_generate_proto_unreach_event(unsigned long data)
 	if (asoc->base.dead)
 		goto out_unlock;
 
-	sctp_do_sm(SCTP_EVENT_T_OTHER,
+	sctp_do_sm(net, SCTP_EVENT_T_OTHER,
 		   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
 		   asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
 
@@ -753,8 +757,10 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
 	int err = 0;
 
 	if (sctp_outq_sack(&asoc->outqueue, sackh)) {
+		struct net *net = sock_net(asoc->base.sk);
+
 		/* There are no more TSNs awaiting SACK.  */
-		err = sctp_do_sm(SCTP_EVENT_T_OTHER,
+		err = sctp_do_sm(net, SCTP_EVENT_T_OTHER,
 				 SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN),
 				 asoc->state, asoc->ep, asoc, NULL,
 				 GFP_ATOMIC);
@@ -1042,6 +1048,8 @@ static int sctp_cmd_send_msg(struct sctp_association *asoc,
  */
 static void sctp_cmd_send_asconf(struct sctp_association *asoc)
 {
+	struct net *net = sock_net(asoc->base.sk);
+
 	/* Send the next asconf chunk from the addip chunk
 	 * queue.
 	 */
@@ -1053,7 +1061,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
 
 		/* Hold the chunk until an ASCONF_ACK is received. */
 		sctp_chunk_hold(asconf);
-		if (sctp_primitive_ASCONF(asoc, asconf))
+		if (sctp_primitive_ASCONF(net, asoc, asconf))
 			sctp_chunk_free(asconf);
 		else
 			asoc->addip_last_asconf = asconf;
@@ -1089,7 +1097,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
  * If you want to understand all of lksctp, this is a
  * good place to start.
  */
-int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
+int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
 	       sctp_state_t state,
 	       struct sctp_endpoint *ep,
 	       struct sctp_association *asoc,
@@ -1110,7 +1118,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
 	/* Look up the state function, run it, and then process the
 	 * side effects.  These three steps are the heart of lksctp.
 	 */
-	state_fn = sctp_sm_lookup_event(event_type, state, subtype);
+	state_fn = sctp_sm_lookup_event(net, event_type, state, subtype);
 
 	sctp_init_cmd_seq(&commands);
 
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 7c211a7f90f4..4a029d798287 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -59,7 +59,8 @@ other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES];
 static const sctp_sm_table_entry_t
 timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES];
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
+							    sctp_cid_t cid,
 							    sctp_state_t state);
 
 
@@ -82,13 +83,14 @@ static const sctp_sm_table_entry_t bug = {
 	rtn;								\
 })
 
-const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
+const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net,
+						  sctp_event_t event_type,
 						  sctp_state_t state,
 						  sctp_subtype_t event_subtype)
 {
 	switch (event_type) {
 	case SCTP_EVENT_T_CHUNK:
-		return sctp_chunk_event_lookup(event_subtype.chunk, state);
+		return sctp_chunk_event_lookup(net, event_subtype.chunk, state);
 	case SCTP_EVENT_T_TIMEOUT:
 		return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
 				 timeout_event_table);
@@ -906,7 +908,8 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S
 	TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE,
 };
 
-static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(sctp_cid_t cid,
+static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
+							    sctp_cid_t cid,
 							    sctp_state_t state)
 {
 	if (state > SCTP_STATE_MAX)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 5b6dd0e3d1f6..a6a4226a922f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -427,6 +427,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
 static int sctp_send_asconf(struct sctp_association *asoc,
 			    struct sctp_chunk *chunk)
 {
+	struct net 	*net = sock_net(asoc->base.sk);
 	int		retval = 0;
 
 	/* If there is an outstanding ASCONF chunk, queue it for later
@@ -439,7 +440,7 @@ static int sctp_send_asconf(struct sctp_association *asoc,
 
 	/* Hold the chunk until an ASCONF_ACK is received. */
 	sctp_chunk_hold(chunk);
-	retval = sctp_primitive_ASCONF(asoc, chunk);
+	retval = sctp_primitive_ASCONF(net, asoc, chunk);
 	if (retval)
 		sctp_chunk_free(chunk);
 	else
@@ -1050,6 +1051,7 @@ static int __sctp_connect(struct sock* sk,
 			  int addrs_size,
 			  sctp_assoc_t *assoc_id)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
 	struct sctp_endpoint *ep;
 	struct sctp_association *asoc = NULL;
@@ -1200,7 +1202,7 @@ static int __sctp_connect(struct sock* sk,
 			goto out_free;
 	}
 
-	err = sctp_primitive_ASSOCIATE(asoc, NULL);
+	err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
 	if (err < 0) {
 		goto out_free;
 	}
@@ -1458,6 +1460,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len,
  */
 SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_endpoint *ep;
 	struct sctp_association *asoc;
 	struct list_head *pos, *temp;
@@ -1499,9 +1502,9 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
 
 			chunk = sctp_make_abort_user(asoc, NULL, 0);
 			if (chunk)
-				sctp_primitive_ABORT(asoc, chunk);
+				sctp_primitive_ABORT(net, asoc, chunk);
 		} else
-			sctp_primitive_SHUTDOWN(asoc, NULL);
+			sctp_primitive_SHUTDOWN(net, asoc, NULL);
 	}
 
 	/* On a TCP-style socket, block for at most linger_time if set. */
@@ -1569,6 +1572,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
 SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			     struct msghdr *msg, size_t msg_len)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
 	struct sctp_endpoint *ep;
 	struct sctp_association *new_asoc=NULL, *asoc=NULL;
@@ -1714,7 +1718,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 		if (sinfo_flags & SCTP_EOF) {
 			SCTP_DEBUG_PRINTK("Shutting down association: %p\n",
 					  asoc);
-			sctp_primitive_SHUTDOWN(asoc, NULL);
+			sctp_primitive_SHUTDOWN(net, asoc, NULL);
 			err = 0;
 			goto out_unlock;
 		}
@@ -1727,7 +1731,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			}
 
 			SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
-			sctp_primitive_ABORT(asoc, chunk);
+			sctp_primitive_ABORT(net, asoc, chunk);
 			err = 0;
 			goto out_unlock;
 		}
@@ -1900,7 +1904,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 	/* Auto-connect, if we aren't connected already. */
 	if (sctp_state(asoc, CLOSED)) {
-		err = sctp_primitive_ASSOCIATE(asoc, NULL);
+		err = sctp_primitive_ASSOCIATE(net, asoc, NULL);
 		if (err < 0)
 			goto out_free;
 		SCTP_DEBUG_PRINTK("We associated primitively.\n");
@@ -1928,7 +1932,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	 * works that way today.  Keep it that way or this
 	 * breaks.
 	 */
-	err = sctp_primitive_SEND(asoc, datamsg);
+	err = sctp_primitive_SEND(net, asoc, datamsg);
 	/* Did the lower layer accept the chunk? */
 	if (err)
 		sctp_datamsg_free(datamsg);
@@ -2320,7 +2324,9 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 	int error;
 
 	if (params->spp_flags & SPP_HB_DEMAND && trans) {
-		error = sctp_primitive_REQUESTHEARTBEAT (trans->asoc, trans);
+		struct net *net = sock_net(trans->asoc->base.sk);
+
+		error = sctp_primitive_REQUESTHEARTBEAT(net, trans->asoc, trans);
 		if (error)
 			return error;
 	}
@@ -4011,6 +4017,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
  */
 SCTP_STATIC void sctp_shutdown(struct sock *sk, int how)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_endpoint *ep;
 	struct sctp_association *asoc;
 
@@ -4022,7 +4029,7 @@ SCTP_STATIC void sctp_shutdown(struct sock *sk, int how)
 		if (!list_empty(&ep->asocs)) {
 			asoc = list_entry(ep->asocs.next,
 					  struct sctp_association, asocs);
-			sctp_primitive_SHUTDOWN(asoc, NULL);
+			sctp_primitive_SHUTDOWN(net, asoc, NULL);
 		}
 	}
 }
-- 
cgit v1.2.3


From 89bf3450cb9b041b1bb4bcc5e7cbdeab4545b1c1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Aug 2012 07:26:14 +0000
Subject: sctp: Push struct net down into sctp_transport_init

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 +-
 net/sctp/associola.c       | 3 ++-
 net/sctp/sm_statefuns.c    | 2 +-
 net/sctp/transport.c       | 8 +++++---
 4 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 6bdfcabe560e..88d217941579 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1066,7 +1066,7 @@ struct sctp_transport {
 	__u64 hb_nonce;
 };
 
-struct sctp_transport *sctp_transport_new(const union sctp_addr *,
+struct sctp_transport *sctp_transport_new(struct net *, const union sctp_addr *,
 					  gfp_t);
 void sctp_transport_set_owner(struct sctp_transport *,
 			      struct sctp_association *);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 6bcbecafe393..93a4513c85e0 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -641,6 +641,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 					   const gfp_t gfp,
 					   const int peer_state)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_transport *peer;
 	struct sctp_sock *sp;
 	unsigned short port;
@@ -674,7 +675,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 		return peer;
 	}
 
-	peer = sctp_transport_new(addr, gfp);
+	peer = sctp_transport_new(net, addr, gfp);
 	if (!peer)
 		return NULL;
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index bee5e2c288d8..ff2530c848b0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5958,7 +5958,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
 	}
 
 	/* Make a transport for the bucket, Eliza... */
-	transport = sctp_transport_new(sctp_source(chunk), GFP_ATOMIC);
+	transport = sctp_transport_new(net, sctp_source(chunk), GFP_ATOMIC);
 	if (!transport)
 		goto nomem;
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index c97472b248a2..aada963c9d6b 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -59,7 +59,8 @@
 /* 1st Level Abstractions.  */
 
 /* Initialize a new transport from provided memory.  */
-static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
+static struct sctp_transport *sctp_transport_init(struct net *net,
+						  struct sctp_transport *peer,
 						  const union sctp_addr *addr,
 						  gfp_t gfp)
 {
@@ -109,7 +110,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 }
 
 /* Allocate and initialize a new transport.  */
-struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
+struct sctp_transport *sctp_transport_new(struct net *net,
+					  const union sctp_addr *addr,
 					  gfp_t gfp)
 {
 	struct sctp_transport *transport;
@@ -118,7 +120,7 @@ struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
 	if (!transport)
 		goto fail;
 
-	if (!sctp_transport_init(transport, addr, gfp))
+	if (!sctp_transport_init(net, transport, addr, gfp))
 		goto fail_init;
 
 	transport->malloced = 1;
-- 
cgit v1.2.3


From e7ff4a7037e6908b7a5f4682945a0b097d5b3535 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Aug 2012 07:27:02 +0000
Subject: sctp: Push struct net down into sctp_in_scope

struct net will be needed shortly when the tunables are made per network
namespace.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 +-
 net/sctp/bind_addr.c       | 4 ++--
 net/sctp/protocol.c        | 2 +-
 net/sctp/sm_make_chunk.c   | 3 ++-
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 88d217941579..b0882f3a3a51 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1248,7 +1248,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len,
 			   __u16 port, gfp_t gfp);
 
 sctp_scope_t sctp_scope(const union sctp_addr *);
-int sctp_in_scope(const union sctp_addr *addr, const sctp_scope_t scope);
+int sctp_in_scope(struct net *net, const union sctp_addr *addr, const sctp_scope_t scope);
 int sctp_is_any(struct sock *sk, const union sctp_addr *addr);
 int sctp_addr_is_valid(const union sctp_addr *addr);
 int sctp_is_ep_boundall(struct sock *sk);
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index a85ce4b3e574..23389ba44e39 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -457,7 +457,7 @@ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest,
 
 	if (sctp_is_any(NULL, addr)) {
 		error = sctp_copy_local_addr_list(net, dest, scope, gfp, flags);
-	} else if (sctp_in_scope(addr, scope)) {
+	} else if (sctp_in_scope(net, addr, scope)) {
 		/* Now that the address is in scope, check to see if
 		 * the address type is supported by local sock as
 		 * well as the remote peer.
@@ -494,7 +494,7 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr)
 }
 
 /* Is 'addr' valid for 'scope'?  */
-int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope)
+int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope)
 {
 	sctp_scope_t addr_scope = sctp_scope(addr);
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 0f2342be61f3..59965bdea07a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -210,7 +210,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
 	list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
 		if (!addr->valid)
 			continue;
-		if (sctp_in_scope(&addr->a, scope)) {
+		if (sctp_in_scope(net, &addr->a, scope)) {
 			/* Now that the address is in scope, check to see if
 			 * the address type is really supported by the local
 			 * sock as well as the remote peer.
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 479a70ef6ff8..fb12835e95c2 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2466,6 +2466,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 			      const union sctp_addr *peer_addr,
 			      gfp_t gfp)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	union sctp_addr addr;
 	int i;
 	__u16 sat;
@@ -2494,7 +2495,7 @@ do_addr_param:
 		af = sctp_get_af_specific(param_type2af(param.p->type));
 		af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
 		scope = sctp_scope(peer_addr);
-		if (sctp_in_scope(&addr, scope))
+		if (sctp_in_scope(net, &addr, scope))
 			if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
 				return 0;
 		break;
-- 
cgit v1.2.3


From 24cb81a6a91288fcba19548944729ea906eb5e2a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Aug 2012 07:28:09 +0000
Subject: sctp: Push struct net down into all of the state machine functions

There are a handle of state machine functions primarily those dealing
with processing INIT packets where there is neither a valid endpoint nor
a valid assoication from which to derive a struct net.  Therefore add
struct net * to the parameter list of sctp_state_fn_t and update all of
the state machine functions.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sm.h    |   3 +-
 net/sctp/sm_sideeffect.c |   2 +-
 net/sctp/sm_statefuns.c  | 619 ++++++++++++++++++++++++++---------------------
 3 files changed, 341 insertions(+), 283 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index bcef13023ac3..b5887e1677e4 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -77,7 +77,8 @@ typedef struct {
 	int action;
 } sctp_sm_command_t;
 
-typedef sctp_disposition_t (sctp_state_fn_t) (const struct sctp_endpoint *,
+typedef sctp_disposition_t (sctp_state_fn_t) (struct net *,
+					      const struct sctp_endpoint *,
 					      const struct sctp_association *,
 					      const sctp_subtype_t type,
 					      void *arg,
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 02c4c1c066a7..bcfebb91559d 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1123,7 +1123,7 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype,
 	sctp_init_cmd_seq(&commands);
 
 	DEBUG_PRE;
-	status = (*state_fn->fn)(ep, asoc, subtype, event_arg, &commands);
+	status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands);
 	DEBUG_POST;
 
 	error = sctp_side_effects(event_type, subtype, state,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index ff2530c848b0..19f3bff84193 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -66,7 +66,8 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/structs.h>
 
-static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
+static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
+				  const struct sctp_endpoint *ep,
 				  const struct sctp_association *asoc,
 				  struct sctp_chunk *chunk,
 				  const void *payload,
@@ -77,34 +78,40 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 static struct sctp_packet *sctp_ootb_pkt_new(struct net *net,
 					     const struct sctp_association *asoc,
 					     const struct sctp_chunk *chunk);
-static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
+static void sctp_send_stale_cookie_err(struct net *net,
+				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
 				       sctp_cmd_seq_t *commands,
 				       struct sctp_chunk *err_chunk);
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
+						 const struct sctp_endpoint *ep,
 						 const struct sctp_association *asoc,
 						 const sctp_subtype_t type,
 						 void *arg,
 						 sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
+					     const struct sctp_endpoint *ep,
 					     const struct sctp_association *asoc,
 					     const sctp_subtype_t type,
 					     void *arg,
 					     sctp_cmd_seq_t *commands);
-static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
 					sctp_cmd_seq_t *commands);
 static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk);
 
-static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
+static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
+					   sctp_cmd_seq_t *commands,
 					   __be16 error, int sk_err,
 					   const struct sctp_association *asoc,
 					   struct sctp_transport *transport);
 
 static sctp_disposition_t sctp_sf_abort_violation(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     void *arg,
@@ -113,6 +120,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 				     const size_t paylen);
 
 static sctp_disposition_t sctp_sf_violation_chunklen(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
@@ -120,6 +128,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 				     sctp_cmd_seq_t *commands);
 
 static sctp_disposition_t sctp_sf_violation_paramlen(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
@@ -127,6 +136,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 				     sctp_cmd_seq_t *commands);
 
 static sctp_disposition_t sctp_sf_violation_ctsn(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
@@ -134,18 +144,21 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 				     sctp_cmd_seq_t *commands);
 
 static sctp_disposition_t sctp_sf_violation_chunk(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
 				     void *arg,
 				     sctp_cmd_seq_t *commands);
 
-static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
+static sctp_ierror_t sctp_sf_authenticate(struct net *net,
+				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
 				    const sctp_subtype_t type,
 				    struct sctp_chunk *chunk);
 
-static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -205,7 +218,8 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_4_C(struct net *net,
+				  const struct sctp_endpoint *ep,
 				  const struct sctp_association *asoc,
 				  const sctp_subtype_t type,
 				  void *arg,
@@ -213,10 +227,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
-	struct net *net;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* RFC 2960 6.10 Bundling
 	 *
@@ -224,11 +237,11 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 	 * SHUTDOWN COMPLETE with any other chunks.
 	 */
 	if (!chunk->singleton)
-		return sctp_sf_violation_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN_COMPLETE chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* RFC 2960 10.2 SCTP-to-ULP
@@ -261,7 +274,6 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
 
-	net = sock_net(asoc->base.sk);
 	SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
 	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 
@@ -292,7 +304,8 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -304,7 +317,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	struct sctp_chunk *err_chunk;
 	struct sctp_packet *packet;
 	sctp_unrecognized_param_t *unk_param;
-	struct net *net;
 	int len;
 
 	/* 6.10 Bundling
@@ -317,22 +329,21 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	 * with an INIT chunk that is bundled with other chunks.
 	 */
 	if (!chunk->singleton)
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
 	 */
-	net = sock_net(ep->base.sk);
 	if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
 		SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
-		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+		return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 	}
 
 	/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
 	 * Tag.
 	 */
 	if (chunk->sctp_hdr->vtag != 0)
-		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+		return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the INIT chunk has a valid length.
 	 * Normally, this would cause an ABORT with a Protocol Violation
@@ -340,7 +351,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	 * just discard the packet.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* If the INIT is coming toward a closing socket, we'll send back
 	 * and ABORT.  Essentially, this catches the race of INIT being
@@ -349,7 +360,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	 * can treat this OOTB
 	 */
 	if (sctp_sstate(ep->base.sk, CLOSING))
-		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+		return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
@@ -360,7 +371,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 		 * Send an ABORT, with causes if there is any.
 		 */
 		if (err_chunk) {
-			packet = sctp_abort_pkt_new(ep, asoc, arg,
+			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
 					sizeof(sctp_chunkhdr_t),
 					ntohs(err_chunk->chunk_hdr->length) -
@@ -377,7 +388,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 				return SCTP_DISPOSITION_NOMEM;
 			}
 		} else {
-			return sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+			return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg,
 						    commands);
 		}
 	}
@@ -489,7 +500,8 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
+				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const sctp_subtype_t type,
 				       void *arg,
@@ -501,18 +513,18 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 	struct sctp_packet *packet;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* 6.10 Bundling
 	 * An endpoint MUST NOT bundle INIT, INIT ACK or
 	 * SHUTDOWN COMPLETE with any other chunks.
 	 */
 	if (!chunk->singleton)
-		return sctp_sf_violation_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the INIT-ACK chunk has a valid length */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
 	chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
@@ -531,7 +543,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 		 * the association.
 		 */
 		if (err_chunk) {
-			packet = sctp_abort_pkt_new(ep, asoc, arg,
+			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
 					sizeof(sctp_chunkhdr_t),
 					ntohs(err_chunk->chunk_hdr->length) -
@@ -542,7 +554,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 			if (packet) {
 				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 						SCTP_PACKET(packet));
-				SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
+				SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 				error = SCTP_ERROR_INV_PARAM;
 			}
 		}
@@ -559,10 +571,10 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
 		 * was malformed.
 		 */
 		if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
-		SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_ABORTEDS);
-		return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED,
+		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+		return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED,
 						asoc, chunk->transport);
 	}
 
@@ -638,7 +650,8 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
+				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
 				      const sctp_subtype_t type, void *arg,
 				      sctp_cmd_seq_t *commands)
@@ -651,15 +664,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	int error = 0;
 	struct sctp_chunk *err_chk_p;
 	struct sock *sk;
-	struct net *net;
 
 	/* If the packet is an OOTB packet which is temporarily on the
 	 * control endpoint, respond with an ABORT.
 	 */
-	net = sock_net(ep->base.sk);
 	if (ep == sctp_sk(net->sctp.ctl_sock)->ep) {
 		SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
-		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+		return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 	}
 
 	/* Make sure that the COOKIE_ECHO chunk has a valid length.
@@ -668,7 +679,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	 * in sctp_unpack_cookie().
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* If the endpoint is not listening or if the number of associations
 	 * on the TCP-style socket exceed the max backlog, respond with an
@@ -677,7 +688,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	sk = ep->base.sk;
 	if (!sctp_sstate(sk, LISTENING) ||
 	    (sctp_style(sk, TCP) && sk_acceptq_is_full(sk)))
-		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+		return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 
 	/* "Decode" the chunk.  We have no optional parameters so we
 	 * are in good shape.
@@ -710,13 +721,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 			goto nomem;
 
 		case -SCTP_IERROR_STALE_COOKIE:
-			sctp_send_stale_cookie_err(ep, asoc, chunk, commands,
+			sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands,
 						   err_chk_p);
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 		case -SCTP_IERROR_BAD_SIG:
 		default:
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 		}
 	}
 
@@ -763,14 +774,14 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 		skb_pull(chunk->auth_chunk, sizeof(sctp_chunkhdr_t));
 		auth.transport = chunk->transport;
 
-		ret = sctp_sf_authenticate(ep, new_asoc, type, &auth);
+		ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
 
 		/* We can now safely free the auth_chunk clone */
 		kfree_skb(chunk->auth_chunk);
 
 		if (ret != SCTP_IERROR_NO_ERROR) {
 			sctp_association_free(new_asoc);
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 		}
 	}
 
@@ -863,23 +874,23 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net,
+				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
 				      const sctp_subtype_t type, void *arg,
 				      sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
 	struct sctp_ulpevent *ev;
-	struct net *net;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Verify that the chunk length for the COOKIE-ACK is OK.
 	 * If we don't do this, any bundled chunks may be junked.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Reset init error count upon receipt of COOKIE-ACK,
@@ -900,7 +911,6 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_ESTABLISHED));
-	net = sock_net(ep->base.sk);
 	SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
 	SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
@@ -967,7 +977,8 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep,
 }
 
 /* Generate a HEARTBEAT packet on the given transport.  */
-sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -976,13 +987,11 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
 	struct sctp_transport *transport = (struct sctp_transport *) arg;
 
 	if (asoc->overall_error_count >= asoc->max_retrans) {
-		struct net *net;
 		sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR,
 				SCTP_ERROR(ETIMEDOUT));
 		/* CMD_ASSOC_FAILED calls CMD_DELETE_TCB. */
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_NO_ERROR));
-		net = sock_net(ep->base.sk);
 		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_DELETE_TCB;
@@ -1039,7 +1048,8 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_beat_8_3(struct net *net,
+				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
 				    const sctp_subtype_t type,
 				    void *arg,
@@ -1050,11 +1060,11 @@ sctp_disposition_t sctp_sf_beat_8_3(const struct sctp_endpoint *ep,
 	size_t paylen = 0;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* 8.3 The receiver of the HEARTBEAT should immediately
@@ -1106,7 +1116,8 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -1119,12 +1130,12 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
 	unsigned long max_interval;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the HEARTBEAT-ACK chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t) +
 					    sizeof(sctp_sender_hb_info_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data;
@@ -1213,7 +1224,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa,
 	/* Association is NULL since this may be a restart attack and we
 	 * want to send back the attacker's vtag.
 	 */
-	pkt = sctp_abort_pkt_new(ep, NULL, init, errhdr, len);
+	pkt = sctp_abort_pkt_new(net, ep, NULL, init, errhdr, len);
 
 	if (!pkt)
 		goto out;
@@ -1370,6 +1381,7 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc,
  * chunk handling.
  */
 static sctp_disposition_t sctp_sf_do_unexpected_init(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -1394,20 +1406,20 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 	 * with an INIT chunk that is bundled with other chunks.
 	 */
 	if (!chunk->singleton)
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
 	 * Tag.
 	 */
 	if (chunk->sctp_hdr->vtag != 0)
-		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+		return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the INIT chunk has a valid length.
 	 * In this case, we generate a protocol violation since we have
 	 * an association established.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* Grab the INIT header.  */
 	chunk->subh.init_hdr = (sctp_inithdr_t *) chunk->skb->data;
@@ -1424,7 +1436,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 		 * Send an ABORT, with causes if there is any.
 		 */
 		if (err_chunk) {
-			packet = sctp_abort_pkt_new(ep, asoc, arg,
+			packet = sctp_abort_pkt_new(net, ep, asoc, arg,
 					(__u8 *)(err_chunk->chunk_hdr) +
 					sizeof(sctp_chunkhdr_t),
 					ntohs(err_chunk->chunk_hdr->length) -
@@ -1433,14 +1445,14 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 			if (packet) {
 				sctp_add_cmd_sf(commands, SCTP_CMD_SEND_PKT,
 						SCTP_PACKET(packet));
-				SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
+				SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 				retval = SCTP_DISPOSITION_CONSUME;
 			} else {
 				retval = SCTP_DISPOSITION_NOMEM;
 			}
 			goto cleanup;
 		} else {
-			return sctp_sf_tabort_8_4_8(ep, asoc, type, arg,
+			return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg,
 						    commands);
 		}
 	}
@@ -1582,7 +1594,8 @@ cleanup:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net,
+				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
 				    const sctp_subtype_t type,
 				    void *arg,
@@ -1591,7 +1604,7 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
 	 */
-	return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands);
+	return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -1635,7 +1648,8 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -1644,7 +1658,7 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
 	/* Call helper to do the real work for both simulataneous and
 	 * duplicate INIT chunk handling.
 	 */
-	return sctp_sf_do_unexpected_init(ep, asoc, type, arg, commands);
+	return sctp_sf_do_unexpected_init(net, ep, asoc, type, arg, commands);
 }
 
 
@@ -1657,19 +1671,19 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(const struct sctp_endpoint *ep,
  * An unexpected INIT ACK usually indicates the processing of an old or
  * duplicated INIT chunk.
 */
-sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net,
+					    const struct sctp_endpoint *ep,
 					    const struct sctp_association *asoc,
 					    const sctp_subtype_t type,
 					    void *arg, sctp_cmd_seq_t *commands)
 {
-	struct net *net = sock_net(ep->base.sk);
 	/* Per the above section, we'll discard the chunk if we have an
 	 * endpoint.  If this is an OOTB INIT-ACK, treat it as such.
 	 */
 	if (ep == sctp_sk(net->sctp.ctl_sock)->ep)
-		return sctp_sf_ootb(ep, asoc, type, arg, commands);
+		return sctp_sf_ootb(net, ep, asoc, type, arg, commands);
 	else
-		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
 }
 
 /* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A')
@@ -1677,7 +1691,8 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(const struct sctp_endpoint *ep,
  * Section 5.2.4
  *  A)  In this case, the peer may have restarted.
  */
-static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
 					sctp_cmd_seq_t *commands,
@@ -1713,7 +1728,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
 	 * its peer.
 	*/
 	if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) {
-		disposition = sctp_sf_do_9_2_reshutack(ep, asoc,
+		disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc,
 				SCTP_ST_CHUNK(chunk->chunk_hdr->type),
 				chunk, commands);
 		if (SCTP_DISPOSITION_NOMEM == disposition)
@@ -1776,7 +1791,8 @@ nomem:
  *      after responding to the local endpoint's INIT
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
 					sctp_cmd_seq_t *commands,
@@ -1797,7 +1813,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_ESTABLISHED));
-	SCTP_INC_STATS(sock_net(new_asoc->base.sk), SCTP_MIB_CURRESTAB);
+	SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
 	sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
 
 	repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -1846,7 +1862,8 @@ nomem:
  *     but a new tag of its own.
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
 					sctp_cmd_seq_t *commands,
@@ -1867,7 +1884,8 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(const struct sctp_endpoint *ep,
  *    enter the ESTABLISHED state, if it has not already done so.
  */
 /* This case represents an initialization collision.  */
-static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					struct sctp_chunk *chunk,
 					sctp_cmd_seq_t *commands,
@@ -1889,7 +1907,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE));
 		sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 				SCTP_STATE(SCTP_STATE_ESTABLISHED));
-		SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_CURRESTAB);
+		SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
 		sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START,
 				SCTP_NULL());
 
@@ -1961,7 +1979,8 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -1980,7 +1999,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 	 * done later.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* "Decode" the chunk.  We have no optional parameters so we
@@ -2014,12 +2033,12 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 			goto nomem;
 
 		case -SCTP_IERROR_STALE_COOKIE:
-			sctp_send_stale_cookie_err(ep, asoc, chunk, commands,
+			sctp_send_stale_cookie_err(net, ep, asoc, chunk, commands,
 						   err_chk_p);
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 		case -SCTP_IERROR_BAD_SIG:
 		default:
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 		}
 	}
 
@@ -2030,27 +2049,27 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 
 	switch (action) {
 	case 'A': /* Association restart. */
-		retval = sctp_sf_do_dupcook_a(ep, asoc, chunk, commands,
+		retval = sctp_sf_do_dupcook_a(net, ep, asoc, chunk, commands,
 					      new_asoc);
 		break;
 
 	case 'B': /* Collision case B. */
-		retval = sctp_sf_do_dupcook_b(ep, asoc, chunk, commands,
+		retval = sctp_sf_do_dupcook_b(net, ep, asoc, chunk, commands,
 					      new_asoc);
 		break;
 
 	case 'C': /* Collision case C. */
-		retval = sctp_sf_do_dupcook_c(ep, asoc, chunk, commands,
+		retval = sctp_sf_do_dupcook_c(net, ep, asoc, chunk, commands,
 					      new_asoc);
 		break;
 
 	case 'D': /* Collision case D. */
-		retval = sctp_sf_do_dupcook_d(ep, asoc, chunk, commands,
+		retval = sctp_sf_do_dupcook_d(net, ep, asoc, chunk, commands,
 					      new_asoc);
 		break;
 
 	default: /* Discard packet for all others. */
-		retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		retval = sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 		break;
 	}
 
@@ -2076,6 +2095,7 @@ nomem:
  * See sctp_sf_do_9_1_abort().
  */
 sctp_disposition_t sctp_sf_shutdown_pending_abort(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -2085,7 +2105,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	struct sctp_chunk *chunk = arg;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ABORT chunk has a valid length.
 	 * Since this is an ABORT chunk, we have to discard it
@@ -2098,7 +2118,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	 * packet.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
 	 * F4)  One special consideration is that ABORT Chunks arriving
@@ -2107,9 +2127,9 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
 	 */
 	if (SCTP_ADDR_DEL ==
 		    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
-		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
 
-	return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+	return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -2117,7 +2137,8 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort(
  *
  * See sctp_sf_do_9_1_abort().
  */
-sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -2126,7 +2147,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
 	struct sctp_chunk *chunk = arg;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ABORT chunk has a valid length.
 	 * Since this is an ABORT chunk, we have to discard it
@@ -2139,7 +2160,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
 	 * packet.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
 	 * F4)  One special consideration is that ABORT Chunks arriving
@@ -2148,7 +2169,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
 	 */
 	if (SCTP_ADDR_DEL ==
 		    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
-		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Stop the T2-shutdown timer. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -2158,7 +2179,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
-	return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+	return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -2167,6 +2188,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(const struct sctp_endpoint *ep,
  * See sctp_sf_do_9_1_abort().
  */
 sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -2176,7 +2198,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
 	 */
-	return sctp_sf_shutdown_sent_abort(ep, asoc, type, arg, commands);
+	return sctp_sf_shutdown_sent_abort(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -2193,7 +2215,8 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort(
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -2203,13 +2226,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
 	sctp_errhdr_t *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ERROR chunk has a valid length.
 	 * The parameter walking depends on this as well.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Process the error here */
@@ -2219,7 +2242,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
 	 */
 	sctp_walk_errors(err, chunk->chunk_hdr) {
 		if (SCTP_ERROR_STALE_COOKIE == err->cause)
-			return sctp_sf_do_5_2_6_stale(ep, asoc, type,
+			return sctp_sf_do_5_2_6_stale(net, ep, asoc, type,
 							arg, commands);
 	}
 
@@ -2228,7 +2251,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
 	 * we are discarding the packet, there should be no adverse
 	 * affects.
 	 */
-	return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+	return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -2256,7 +2279,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_disposition_t sctp_sf_do_5_2_6_stale(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
+						 const struct sctp_endpoint *ep,
 						 const struct sctp_association *asoc,
 						 const sctp_subtype_t type,
 						 void *arg,
@@ -2378,7 +2402,8 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -2387,7 +2412,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	struct sctp_chunk *chunk = arg;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ABORT chunk has a valid length.
 	 * Since this is an ABORT chunk, we have to discard it
@@ -2400,7 +2425,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	 * packet.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* ADD-IP: Special case for ABORT chunks
 	 * F4)  One special consideration is that ABORT Chunks arriving
@@ -2409,12 +2434,13 @@ sctp_disposition_t sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	 */
 	if (SCTP_ADDR_DEL ==
 		    sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
-		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
 
-	return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands);
+	return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands);
 }
 
-static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
+static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -2423,7 +2449,6 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	struct sctp_chunk *chunk = arg;
 	unsigned int len;
 	__be16 error = SCTP_ERROR_NO_ERROR;
-	struct net *net;
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
@@ -2432,7 +2457,7 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 		sctp_errhdr_t *err;
 		sctp_walk_errors(err, chunk->chunk_hdr);
 		if ((void *)err != (void *)chunk->chunk_end)
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
 	}
@@ -2440,7 +2465,6 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET));
 	/* ASSOC_FAILED will DELETE_TCB. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(error));
-	net = sock_net(ep->base.sk);
 	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 
@@ -2452,7 +2476,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(const struct sctp_endpoint *ep,
  *
  * See sctp_sf_do_9_1_abort() above.
  */
-sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net,
+				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
 				     void *arg,
@@ -2463,7 +2488,7 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
 	__be16 error = SCTP_ERROR_NO_ERROR;
 
 	if (!sctp_vtag_verify_either(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ABORT chunk has a valid length.
 	 * Since this is an ABORT chunk, we have to discard it
@@ -2476,27 +2501,28 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(const struct sctp_endpoint *ep,
 	 * packet.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t)))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* See if we have an error cause code in the chunk.  */
 	len = ntohs(chunk->chunk_hdr->length);
 	if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr))
 		error = ((sctp_errhdr_t *)chunk->skb->data)->cause;
 
-	return sctp_stop_t1_and_abort(commands, error, ECONNREFUSED, asoc,
+	return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc,
 				      chunk->transport);
 }
 
 /*
  * Process an incoming ICMP as an ABORT.  (COOKIE-WAIT state)
  */
-sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
-	return sctp_stop_t1_and_abort(commands, SCTP_ERROR_NO_ERROR,
+	return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR,
 				      ENOPROTOOPT, asoc,
 				      (struct sctp_transport *)arg);
 }
@@ -2504,7 +2530,8 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(const struct sctp_endpoint *ep
 /*
  * Process an ABORT.  (COOKIE-ECHOED state)
  */
-sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net,
+					       const struct sctp_endpoint *ep,
 					       const struct sctp_association *asoc,
 					       const sctp_subtype_t type,
 					       void *arg,
@@ -2513,7 +2540,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
 	 */
-	return sctp_sf_cookie_wait_abort(ep, asoc, type, arg, commands);
+	return sctp_sf_cookie_wait_abort(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -2521,7 +2548,8 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(const struct sctp_endpoint *ep,
  *
  * This is common code called by several sctp_sf_*_abort() functions above.
  */
-static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
+static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net,
+					   sctp_cmd_seq_t *commands,
 					   __be16 error, int sk_err,
 					   const struct sctp_association *asoc,
 					   struct sctp_transport *transport)
@@ -2529,7 +2557,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
 	SCTP_DEBUG_PRINTK("ABORT received (INIT).\n");
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
-	SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_ABORTEDS);
+	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
 	sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(sk_err));
@@ -2572,7 +2600,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net,
+					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
 					   const sctp_subtype_t type,
 					   void *arg,
@@ -2585,12 +2614,12 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk,
 				      sizeof(struct sctp_shutdown_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Convert the elaborate header.  */
@@ -2610,7 +2639,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
 	 * sender with an ABORT.
 	 */
 	if (!TSN_lt(ctsn, asoc->next_tsn))
-		return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+		return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
 
 	/* API 5.3.1.5 SCTP_SHUTDOWN_EVENT
 	 * When a peer sends a SHUTDOWN, SCTP delivers this notification to
@@ -2634,7 +2663,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
 	disposition = SCTP_DISPOSITION_CONSUME;
 
 	if (sctp_outq_is_empty(&asoc->outqueue)) {
-		disposition = sctp_sf_do_9_2_shutdown_ack(ep, asoc, type,
+		disposition = sctp_sf_do_9_2_shutdown_ack(net, ep, asoc, type,
 							  arg, commands);
 	}
 
@@ -2660,7 +2689,8 @@ out:
  * The Cumulative TSN Ack of the received SHUTDOWN chunk
  * MUST be processed.
  */
-sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net,
+					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
 					   const sctp_subtype_t type,
 					   void *arg,
@@ -2671,12 +2701,12 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk,
 				      sizeof(struct sctp_shutdown_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
@@ -2693,7 +2723,7 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
 	 * sender with an ABORT.
 	 */
 	if (!TSN_lt(ctsn, asoc->next_tsn))
-		return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+		return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
 
 	/* verify, by checking the Cumulative TSN Ack field of the
 	 * chunk, that all its outstanding DATA chunks have been
@@ -2712,7 +2742,8 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
  * that belong to this association, it should discard the INIT chunk and
  * retransmit the SHUTDOWN ACK chunk.
  */
-sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net,
+				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
 				    const sctp_subtype_t type,
 				    void *arg,
@@ -2723,7 +2754,7 @@ sctp_disposition_t sctp_sf_do_9_2_reshutack(const struct sctp_endpoint *ep,
 
 	/* Make sure that the chunk has a valid length */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Since we are not going to really process this INIT, there
@@ -2775,7 +2806,8 @@ nomem:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net,
+				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
 				      const sctp_subtype_t type,
 				      void *arg,
@@ -2786,10 +2818,10 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
 	u32 lowest_tsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	cwr = (sctp_cwrhdr_t *) chunk->skb->data;
@@ -2830,7 +2862,8 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_ecne(struct net *net,
+				   const struct sctp_endpoint *ep,
 				   const struct sctp_association *asoc,
 				   const sctp_subtype_t type,
 				   void *arg,
@@ -2840,10 +2873,10 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
 	struct sctp_chunk *chunk = arg;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	ecne = (sctp_ecnehdr_t *) chunk->skb->data;
@@ -2886,7 +2919,8 @@ sctp_disposition_t sctp_sf_do_ecne(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -2899,11 +2933,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
 				SCTP_NULL());
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	error = sctp_eat_data(asoc, chunk, commands );
@@ -2912,16 +2946,16 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep,
 		break;
 	case SCTP_IERROR_HIGH_TSN:
 	case SCTP_IERROR_BAD_STREAM:
-		SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
+		SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
 		goto discard_noforce;
 	case SCTP_IERROR_DUP_TSN:
 	case SCTP_IERROR_IGNORE_TSN:
-		SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
+		SCTP_INC_STATS(net, SCTP_MIB_IN_DATA_CHUNK_DISCARDS);
 		goto discard_force;
 	case SCTP_IERROR_NO_DATA:
 		goto consume;
 	case SCTP_IERROR_PROTO_VIOLATION:
-		return sctp_sf_abort_violation(ep, asoc, chunk, commands,
+		return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
 			(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
 	default:
 		BUG();
@@ -3007,7 +3041,8 @@ consume:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net,
+				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
 				     void *arg,
@@ -3019,11 +3054,11 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
 				SCTP_NULL());
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_data_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	error = sctp_eat_data(asoc, chunk, commands );
@@ -3037,7 +3072,7 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(const struct sctp_endpoint *ep,
 	case SCTP_IERROR_NO_DATA:
 		goto consume;
 	case SCTP_IERROR_PROTO_VIOLATION:
-		return sctp_sf_abort_violation(ep, asoc, chunk, commands,
+		return sctp_sf_abort_violation(net, ep, asoc, chunk, commands,
 			(u8 *)chunk->subh.data_hdr, sizeof(sctp_datahdr_t));
 	default:
 		BUG();
@@ -3097,7 +3132,8 @@ consume:
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -3108,18 +3144,18 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
 	__u32 ctsn;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SACK chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Pull the SACK chunk from the data buffer */
 	sackh = sctp_sm_pull_sack(chunk);
 	/* Was this a bogus SACK? */
 	if (!sackh)
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	chunk->subh.sack_hdr = sackh;
 	ctsn = ntohl(sackh->cum_tsn_ack);
 
@@ -3140,7 +3176,7 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
 	 * sender with an ABORT.
 	 */
 	if (!TSN_lt(ctsn, asoc->next_tsn))
-		return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+		return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
 
 	/* Return this SACK for further processing.  */
 	sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh));
@@ -3169,7 +3205,8 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
 */
-static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -3178,9 +3215,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *abort;
-	struct net *net;
 
-	net = sock_net(ep->base.sk);
 	packet = sctp_ootb_pkt_new(net, asoc, chunk);
 
 	if (packet) {
@@ -3207,7 +3242,7 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
 
 		SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 
-		sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 		return SCTP_DISPOSITION_CONSUME;
 	}
 
@@ -3222,7 +3257,8 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
 */
-sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_operr_notify(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -3232,15 +3268,15 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
 	sctp_errhdr_t *err;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ERROR chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	sctp_walk_errors(err, chunk->chunk_hdr);
 	if ((void *)err != (void *)chunk->chunk_end)
-		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
 						  (void *)err, commands);
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR,
@@ -3259,7 +3295,8 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_9_2_final(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -3268,14 +3305,13 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *reply;
 	struct sctp_ulpevent *ev;
-	struct net *net;
 
 	if (!sctp_vtag_verify(chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 	/* 10.2 H) SHUTDOWN COMPLETE notification
 	 *
@@ -3308,7 +3344,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
 			SCTP_STATE(SCTP_STATE_CLOSED));
-	net = sock_net(asoc->base.sk);
 	SCTP_INC_STATS(net, SCTP_MIB_SHUTDOWNS);
 	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
@@ -3343,7 +3378,8 @@ nomem:
  *    receiver of the OOTB packet shall discard the OOTB packet and take
  *    no further action.
  */
-sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_ootb(struct net *net,
+				const struct sctp_endpoint *ep,
 				const struct sctp_association *asoc,
 				const sctp_subtype_t type,
 				void *arg,
@@ -3356,16 +3392,14 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 	__u8 *ch_end;
 	int ootb_shut_ack = 0;
 	int ootb_cookie_ack = 0;
-	struct net *net;
 
-	net = sock_net(asoc->base.sk);
 	SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
 	ch = (sctp_chunkhdr_t *) chunk->chunk_hdr;
 	do {
 		/* Report violation if the chunk is less then minimal */
 		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
-			return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 		/* Now that we know we at least have a chunk header,
@@ -3380,7 +3414,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 		 *   sending an ABORT of its own.
 		 */
 		if (SCTP_CID_ABORT == ch->type)
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 		/* RFC 8.4, 7) If the packet contains a "Stale cookie" ERROR
 		 * or a COOKIE ACK the SCTP Packet should be silently
@@ -3402,18 +3436,18 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 		/* Report violation if chunk len overflows */
 		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
 		if (ch_end > skb_tail_pointer(skb))
-			return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 		ch = (sctp_chunkhdr_t *) ch_end;
 	} while (ch_end < skb_tail_pointer(skb));
 
 	if (ootb_shut_ack)
-		return sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
+		return sctp_sf_shut_8_4_5(net, ep, asoc, type, arg, commands);
 	else if (ootb_cookie_ack)
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	else
-		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+		return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -3437,7 +3471,8 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net,
+					     const struct sctp_endpoint *ep,
 					     const struct sctp_association *asoc,
 					     const sctp_subtype_t type,
 					     void *arg,
@@ -3446,9 +3481,7 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk = arg;
 	struct sctp_chunk *shut;
-	struct net *net;
 
-	net = sock_net(ep->base.sk);
 	packet = sctp_ootb_pkt_new(net, asoc, chunk);
 
 	if (packet) {
@@ -3479,13 +3512,13 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
 		 * the reset of the packet.
 		 */
 		if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 		/* We need to discard the rest of the packet to prevent
 		 * potential bomming attacks from additional bundled chunks.
 		 * This is documented in SCTP Threats ID.
 		 */
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	return SCTP_DISPOSITION_NOMEM;
@@ -3502,7 +3535,8 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(const struct sctp_endpoint *ep,
  *   chunks. --piggy ]
  *
  */
-sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net,
+				      const struct sctp_endpoint *ep,
 				      const struct sctp_association *asoc,
 				      const sctp_subtype_t type,
 				      void *arg,
@@ -3512,7 +3546,7 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
 
 	/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	/* Although we do have an association in this case, it corresponds
@@ -3520,13 +3554,14 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep,
 	 * packet and the state function that handles OOTB SHUTDOWN_ACK is
 	 * called with a NULL association.
 	 */
-	SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_OUTOFBLUES);
+	SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
 
-	return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands);
+	return sctp_sf_shut_8_4_5(net, ep, NULL, type, arg, commands);
 }
 
 /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk.  */
-sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_asconf(struct net *net,
+				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type, void *arg,
 				     sctp_cmd_seq_t *commands)
@@ -3542,7 +3577,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
 				SCTP_NULL());
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	/* ADD-IP: Section 4.1.1
@@ -3552,11 +3587,11 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
 	if (!sctp_addip_noauth && !chunk->auth)
-		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ASCONF ADDIP chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	hdr = (sctp_addiphdr_t *)chunk->skb->data;
@@ -3565,7 +3600,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 	addr_param = (union sctp_addr_param *)hdr->params;
 	length = ntohs(addr_param->p.length);
 	if (length < sizeof(sctp_paramhdr_t))
-		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
 			   (void *)addr_param, commands);
 
 	/* Verify the ASCONF chunk before processing it. */
@@ -3573,7 +3608,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
 			    (sctp_paramhdr_t *)((void *)addr_param + length),
 			    (void *)chunk->chunk_end,
 			    &err_param))
-		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
 						  (void *)err_param, commands);
 
 	/* ADDIP 5.2 E1) Compare the value of the serial number to the value
@@ -3653,7 +3688,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep,
  * When building TLV parameters for the ASCONF Chunk that will add or
  * delete IP addresses the D0 to D13 rules should be applied:
  */
-sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
+					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
 					 const sctp_subtype_t type, void *arg,
 					 sctp_cmd_seq_t *commands)
@@ -3668,7 +3704,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 	if (!sctp_vtag_verify(asconf_ack, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
 				SCTP_NULL());
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	/* ADD-IP, Section 4.1.2:
@@ -3678,11 +3714,11 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
 	if (!sctp_addip_noauth && !asconf_ack->auth)
-		return sctp_sf_discard_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ADDIP chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data;
@@ -3693,7 +3729,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 	    (sctp_paramhdr_t *)addip_hdr->params,
 	    (void *)asconf_ack->chunk_end,
 	    &err_param))
-		return sctp_sf_violation_paramlen(ep, asoc, type, arg,
+		return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
 			   (void *)err_param, commands);
 
 	if (last_asconf) {
@@ -3711,7 +3747,6 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 	 */
 	if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) &&
 	    !(asoc->addip_last_asconf)) {
-		struct net *net;
 		abort = sctp_make_abort(asoc, asconf_ack,
 					sizeof(sctp_errhdr_t));
 		if (abort) {
@@ -3729,14 +3764,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 				SCTP_ERROR(ECONNABORTED));
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
-		net = sock_net(asoc->base.sk);
 		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_ABORT;
 	}
 
 	if ((rcvd_serial == sent_serial) && asoc->addip_last_asconf) {
-		struct net *net;
 		sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
 
@@ -3765,7 +3798,6 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
 				SCTP_ERROR(ECONNABORTED));
 		sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 				SCTP_PERR(SCTP_ERROR_ASCONF_ACK));
-		net = sock_net(asoc->base.sk);
 		SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 		SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 		return SCTP_DISPOSITION_ABORT;
@@ -3788,7 +3820,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
+				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const sctp_subtype_t type,
 				       void *arg,
@@ -3803,12 +3836,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(const struct sctp_endpoint *ep,
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
 				SCTP_NULL());
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	/* Make sure that the FORWARD_TSN chunk has valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data;
@@ -3855,6 +3888,7 @@ discard_noforce:
 }
 
 sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -3870,12 +3904,12 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
 				SCTP_NULL());
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	/* Make sure that the FORWARD_TSN chunk has a valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_fwdtsn_chunk)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	fwdtsn_hdr = (struct sctp_fwdtsn_hdr *)chunk->skb->data;
@@ -3942,7 +3976,8 @@ gen_shutdown:
  *
  * The return value is the disposition of the chunk.
  */
-static sctp_ierror_t sctp_sf_authenticate(const struct sctp_endpoint *ep,
+static sctp_ierror_t sctp_sf_authenticate(struct net *net,
+				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
 				    const sctp_subtype_t type,
 				    struct sctp_chunk *chunk)
@@ -4015,7 +4050,8 @@ nomem:
 	return SCTP_IERROR_NOMEM;
 }
 
-sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_eat_auth(struct net *net,
+				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
 				    const sctp_subtype_t type,
 				    void *arg,
@@ -4028,21 +4064,21 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
 
 	/* Make sure that the peer has AUTH capable */
 	if (!asoc->peer.auth_capable)
-		return sctp_sf_unk_chunk(ep, asoc, type, arg, commands);
+		return sctp_sf_unk_chunk(net, ep, asoc, type, arg, commands);
 
 	if (!sctp_vtag_verify(chunk, asoc)) {
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
 				SCTP_NULL());
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
 
 	/* Make sure that the AUTH chunk has valid length.  */
 	if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_auth_chunk)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	auth_hdr = (struct sctp_authhdr *)chunk->skb->data;
-	error = sctp_sf_authenticate(ep, asoc, type, chunk);
+	error = sctp_sf_authenticate(net, ep, asoc, type, chunk);
 	switch (error) {
 	case SCTP_IERROR_AUTH_BAD_HMAC:
 		/* Generate the ERROR chunk and discard the rest
@@ -4059,10 +4095,10 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
 		/* Fall Through */
 	case SCTP_IERROR_AUTH_BAD_KEYID:
 	case SCTP_IERROR_BAD_SIG:
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	case SCTP_IERROR_PROTO_VIOLATION:
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	case SCTP_IERROR_NOMEM:
@@ -4111,7 +4147,8 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
+				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
 				     void *arg,
@@ -4124,20 +4161,20 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
 	SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk);
 
 	if (!sctp_vtag_verify(unk_chunk, asoc))
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the chunk has a valid length.
 	 * Since we don't know the chunk type, we use a general
 	 * chunkhdr structure to make a comparison.
 	 */
 	if (!sctp_chunk_length_valid(unk_chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	switch (type.chunk & SCTP_CID_ACTION_MASK) {
 	case SCTP_CID_ACTION_DISCARD:
 		/* Discard the packet.  */
-		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 		break;
 	case SCTP_CID_ACTION_DISCARD_ERR:
 		/* Generate an ERROR chunk as response. */
@@ -4152,7 +4189,7 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
 		}
 
 		/* Discard the packet.  */
-		sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+		sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 		return SCTP_DISPOSITION_CONSUME;
 		break;
 	case SCTP_CID_ACTION_SKIP:
@@ -4194,7 +4231,8 @@ sctp_disposition_t sctp_sf_unk_chunk(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_discard_chunk(struct net *net,
+					 const struct sctp_endpoint *ep,
 					 const struct sctp_association *asoc,
 					 const sctp_subtype_t type,
 					 void *arg,
@@ -4207,7 +4245,7 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
 	 * chunkhdr structure to make a comparison.
 	 */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk);
@@ -4232,13 +4270,14 @@ sctp_disposition_t sctp_sf_discard_chunk(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_pdiscard(struct net *net,
+				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
 				    const sctp_subtype_t type,
 				    void *arg,
 				    sctp_cmd_seq_t *commands)
 {
-	SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_IN_PKT_DISCARDS);
+	SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL());
 
 	return SCTP_DISPOSITION_CONSUME;
@@ -4259,7 +4298,8 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep,
  * We simply tag the chunk as a violation.  The state machine will log
  * the violation and continue.
  */
-sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_violation(struct net *net,
+				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
 				     void *arg,
@@ -4269,7 +4309,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
 
 	/* Make sure that the chunk has a valid length. */
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t)))
-		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+		return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
 
 	return SCTP_DISPOSITION_VIOLATION;
@@ -4279,6 +4319,7 @@ sctp_disposition_t sctp_sf_violation(const struct sctp_endpoint *ep,
  * Common function to handle a protocol violation.
  */
 static sctp_disposition_t sctp_sf_abort_violation(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     void *arg,
@@ -4289,7 +4330,6 @@ static sctp_disposition_t sctp_sf_abort_violation(
 	struct sctp_packet *packet = NULL;
 	struct sctp_chunk *chunk =  arg;
 	struct sctp_chunk *abort = NULL;
-	struct net *net;
 
 	/* SCTP-AUTH, Section 6.3:
 	 *    It should be noted that if the receiver wants to tear
@@ -4310,7 +4350,6 @@ static sctp_disposition_t sctp_sf_abort_violation(
 	if (!abort)
 		goto nomem;
 
-	net = sock_net(ep->base.sk);
 	if (asoc) {
 		/* Treat INIT-ACK as a special case during COOKIE-WAIT. */
 		if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK &&
@@ -4369,7 +4408,7 @@ static sctp_disposition_t sctp_sf_abort_violation(
 	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 
 discard:
-	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+	sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
 	return SCTP_DISPOSITION_ABORT;
 
 nomem_pkt:
@@ -4398,6 +4437,7 @@ nomem:
  * Generate an  ABORT chunk and terminate the association.
  */
 static sctp_disposition_t sctp_sf_violation_chunklen(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
@@ -4406,7 +4446,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
 {
 	static const char err_str[]="The following chunk had invalid length:";
 
-	return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
 					sizeof(err_str));
 }
 
@@ -4417,6 +4457,7 @@ static sctp_disposition_t sctp_sf_violation_chunklen(
  * the length is considered as invalid.
  */
 static sctp_disposition_t sctp_sf_violation_paramlen(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
@@ -4426,7 +4467,6 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 	struct sctp_chunk *chunk =  arg;
 	struct sctp_paramhdr *param = ext;
 	struct sctp_chunk *abort = NULL;
-	struct net *net;
 
 	if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc))
 		goto discard;
@@ -4436,7 +4476,6 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 	if (!abort)
 		goto nomem;
 
-	net = sock_net(asoc->base.sk);
 	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
 	SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 
@@ -4448,7 +4487,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen(
 	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 
 discard:
-	sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
+	sctp_sf_pdiscard(net, ep, asoc, SCTP_ST_CHUNK(0), arg, commands);
 	return SCTP_DISPOSITION_ABORT;
 nomem:
 	return SCTP_DISPOSITION_NOMEM;
@@ -4461,6 +4500,7 @@ nomem:
  * error code.
  */
 static sctp_disposition_t sctp_sf_violation_ctsn(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
@@ -4469,7 +4509,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
 {
 	static const char err_str[]="The cumulative tsn ack beyond the max tsn currently sent:";
 
-	return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
 					sizeof(err_str));
 }
 
@@ -4480,6 +4520,7 @@ static sctp_disposition_t sctp_sf_violation_ctsn(
  * on the path and we may not want to continue this communication.
  */
 static sctp_disposition_t sctp_sf_violation_chunk(
+				     struct net *net,
 				     const struct sctp_endpoint *ep,
 				     const struct sctp_association *asoc,
 				     const sctp_subtype_t type,
@@ -4489,9 +4530,9 @@ static sctp_disposition_t sctp_sf_violation_chunk(
 	static const char err_str[]="The following chunk violates protocol:";
 
 	if (!asoc)
-		return sctp_sf_violation(ep, asoc, type, arg, commands);
+		return sctp_sf_violation(net, ep, asoc, type, arg, commands);
 
-	return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str,
+	return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str,
 					sizeof(err_str));
 }
 /***************************************************************************
@@ -4554,7 +4595,8 @@ static sctp_disposition_t sctp_sf_violation_chunk(
  *
  * The return value is a disposition.
  */
-sctp_disposition_t sctp_sf_do_prm_asoc(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net,
+				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const sctp_subtype_t type,
 				       void *arg,
@@ -4665,7 +4707,8 @@ nomem:
  *
  * The return value is the disposition.
  */
-sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_send(struct net *net,
+				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const sctp_subtype_t type,
 				       void *arg,
@@ -4704,6 +4747,7 @@ sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep,
  * The return value is the disposition.
  */
 sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -4725,7 +4769,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
 
 	disposition = SCTP_DISPOSITION_CONSUME;
 	if (sctp_outq_is_empty(&asoc->outqueue)) {
-		disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
+		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
 							    arg, commands);
 	}
 	return disposition;
@@ -4759,6 +4803,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown(
  * The return value is the disposition.
  */
 sctp_disposition_t sctp_sf_do_9_1_prm_abort(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -4775,7 +4820,6 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	 */
 	struct sctp_chunk *abort = arg;
 	sctp_disposition_t retval;
-	struct net *net;
 
 	retval = SCTP_DISPOSITION_CONSUME;
 
@@ -4791,7 +4835,6 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 	sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
 			SCTP_PERR(SCTP_ERROR_USER_ABORT));
 
-	net = sock_net(asoc->base.sk);
 	SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
 	SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
 
@@ -4799,7 +4842,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
 }
 
 /* We tried an illegal operation on an association which is closed.  */
-sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_error_closed(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -4812,7 +4856,8 @@ sctp_disposition_t sctp_sf_error_closed(const struct sctp_endpoint *ep,
 /* We tried an illegal operation on an association which is shutting
  * down.
  */
-sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_error_shutdown(struct net *net,
+					  const struct sctp_endpoint *ep,
 					  const struct sctp_association *asoc,
 					  const sctp_subtype_t type,
 					  void *arg,
@@ -4838,14 +4883,13 @@ sctp_disposition_t sctp_sf_error_shutdown(const struct sctp_endpoint *ep,
  * (timers)
  */
 sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
-	struct net *net = sock_net(asoc->base.sk);
-
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
 
@@ -4874,6 +4918,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown(
  * (timers)
  */
 sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -4882,7 +4927,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
 	 */
-	return sctp_sf_cookie_wait_prm_shutdown(ep, asoc, type, arg, commands);
+	return sctp_sf_cookie_wait_prm_shutdown(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -4900,6 +4945,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown(
  * (timers)
  */
 sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -4908,7 +4954,6 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
 {
 	struct sctp_chunk *abort = arg;
 	sctp_disposition_t retval;
-	struct net *net = sock_net(asoc->base.sk);
 
 	/* Stop T1-init timer */
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
@@ -4950,6 +4995,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
  * (timers)
  */
 sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -4959,7 +5005,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
 	/* There is a single T1 timer, so we should be able to use
 	 * common function with the COOKIE-WAIT state.
 	 */
-	return sctp_sf_cookie_wait_prm_abort(ep, asoc, type, arg, commands);
+	return sctp_sf_cookie_wait_prm_abort(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -4975,6 +5021,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort(
  * (timers)
  */
 sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -4985,7 +5032,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
-	return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands);
+	return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -5001,6 +5048,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort(
  * (timers)
  */
 sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -5015,7 +5063,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
 	sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
 			SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD));
 
-	return sctp_sf_do_9_1_prm_abort(ep, asoc, type, arg, commands);
+	return sctp_sf_do_9_1_prm_abort(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -5031,6 +5079,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort(
  * (timers)
  */
 sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -5040,7 +5089,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
 	/* The same T2 timer, so we should be able to use
 	 * common function with the SHUTDOWN-SENT state.
 	 */
-	return sctp_sf_shutdown_sent_prm_abort(ep, asoc, type, arg, commands);
+	return sctp_sf_shutdown_sent_prm_abort(net, ep, asoc, type, arg, commands);
 }
 
 /*
@@ -5066,6 +5115,7 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort(
  *   association on which a heartbeat should be issued.
  */
 sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
+					struct net *net,
 					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
@@ -5097,7 +5147,8 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
  * When an endpoint has an ASCONF signaled change to be sent to the
  * remote endpoint it should do A1 to A9
  */
-sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -5118,6 +5169,7 @@ sctp_disposition_t sctp_sf_do_prm_asconf(const struct sctp_endpoint *ep,
  * The return value is the disposition of the primitive.
  */
 sctp_disposition_t sctp_sf_ignore_primitive(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -5139,6 +5191,7 @@ sctp_disposition_t sctp_sf_ignore_primitive(
  * retransmit, the stack will immediately send up this notification.
  */
 sctp_disposition_t sctp_sf_do_no_pending_tsn(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -5170,6 +5223,7 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn(
  * The return value is the disposition.
  */
 sctp_disposition_t sctp_sf_do_9_2_start_shutdown(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -5239,6 +5293,7 @@ nomem:
  * The return value is the disposition.
  */
 sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -5257,11 +5312,11 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack(
 	 */
 	if (chunk) {
 		if (!sctp_vtag_verify(chunk, asoc))
-			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+			return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 
 		/* Make sure that the SHUTDOWN chunk has a valid length. */
 		if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t)))
-			return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 							  commands);
 	}
 
@@ -5309,7 +5364,8 @@ nomem:
  *
  * The return value is the disposition of the event.
  */
-sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_ignore_other(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -5334,14 +5390,14 @@ sctp_disposition_t sctp_sf_ignore_other(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_transport *transport = arg;
-	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_INC_STATS(net, SCTP_MIB_T3_RTX_EXPIREDS);
 
@@ -5421,13 +5477,13 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep,
  * allow. However, an SCTP transmitter MUST NOT be more aggressive than
  * the following algorithms allow.
  */
-sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net,
+				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const sctp_subtype_t type,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
-	struct net *net = sock_net(asoc->base.sk);
 	SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS);
 	sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE());
 	return SCTP_DISPOSITION_CONSUME;
@@ -5452,7 +5508,8 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep,
  * (timers, events)
  *
  */
-sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net,
+					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
 					   const sctp_subtype_t type,
 					   void *arg,
@@ -5461,7 +5518,6 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
 	struct sctp_chunk *repl = NULL;
 	struct sctp_bind_addr *bp;
 	int attempts = asoc->init_err_counter + 1;
-	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n");
 	SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS);
@@ -5514,7 +5570,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep,
  * (timers, events)
  *
  */
-sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net,
+					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
 					   const sctp_subtype_t type,
 					   void *arg,
@@ -5522,7 +5579,6 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
 {
 	struct sctp_chunk *repl = NULL;
 	int attempts = asoc->init_err_counter + 1;
-	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n");
 	SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS);
@@ -5563,14 +5619,14 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep
  * the T2-Shutdown timer,  giving its peer ample opportunity to transmit
  * all of its queued DATA chunks that have not yet been sent.
  */
-sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net,
+					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
 					   const sctp_subtype_t type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *reply = NULL;
-	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_DEBUG_PRINTK("Timer T2 expired.\n");
 	SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS);
@@ -5633,6 +5689,7 @@ nomem:
  * If the T4 RTO timer expires the endpoint should do B1 to B5
  */
 sctp_disposition_t sctp_sf_t4_timer_expire(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
@@ -5641,7 +5698,6 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
 {
 	struct sctp_chunk *chunk = asoc->addip_last_asconf;
 	struct sctp_transport *transport = chunk->transport;
-	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_INC_STATS(net, SCTP_MIB_T4_RTO_EXPIREDS);
 
@@ -5704,14 +5760,14 @@ sctp_disposition_t sctp_sf_t4_timer_expire(
  * At the expiration of this timer the sender SHOULD abort the association
  * by sending an ABORT chunk.
  */
-sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net,
+					   const struct sctp_endpoint *ep,
 					   const struct sctp_association *asoc,
 					   const sctp_subtype_t type,
 					   void *arg,
 					   sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *reply = NULL;
-	struct net *net = sock_net(asoc->base.sk);
 
 	SCTP_DEBUG_PRINTK("Timer T5 expired.\n");
 	SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS);
@@ -5740,13 +5796,13 @@ nomem:
  * the user.  So this routine looks same as sctp_sf_do_9_2_prm_shutdown().
  */
 sctp_disposition_t sctp_sf_autoclose_timer_expire(
+	struct net *net,
 	const struct sctp_endpoint *ep,
 	const struct sctp_association *asoc,
 	const sctp_subtype_t type,
 	void *arg,
 	sctp_cmd_seq_t *commands)
 {
-	struct net *net = sock_net(asoc->base.sk);
 	int disposition;
 
 	SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS);
@@ -5764,7 +5820,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
 
 	disposition = SCTP_DISPOSITION_CONSUME;
 	if (sctp_outq_is_empty(&asoc->outqueue)) {
-		disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type,
+		disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type,
 							    arg, commands);
 	}
 	return disposition;
@@ -5782,7 +5838,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire(
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_not_impl(struct net *net,
+				    const struct sctp_endpoint *ep,
 				    const struct sctp_association *asoc,
 				    const sctp_subtype_t type,
 				    void *arg,
@@ -5799,7 +5856,8 @@ sctp_disposition_t sctp_sf_not_impl(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_bug(struct net *net,
+			       const struct sctp_endpoint *ep,
 			       const struct sctp_association *asoc,
 			       const sctp_subtype_t type,
 			       void *arg,
@@ -5819,7 +5877,8 @@ sctp_disposition_t sctp_sf_bug(const struct sctp_endpoint *ep,
  *
  * The return value is the disposition of the chunk.
  */
-sctp_disposition_t sctp_sf_timer_ignore(const struct sctp_endpoint *ep,
+sctp_disposition_t sctp_sf_timer_ignore(struct net *net,
+					const struct sctp_endpoint *ep,
 					const struct sctp_association *asoc,
 					const sctp_subtype_t type,
 					void *arg,
@@ -5861,7 +5920,8 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk)
 /* Create an ABORT packet to be sent as a response, with the specified
  * error causes.
  */
-static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
+static struct sctp_packet *sctp_abort_pkt_new(struct net *net,
+				  const struct sctp_endpoint *ep,
 				  const struct sctp_association *asoc,
 				  struct sctp_chunk *chunk,
 				  const void *payload,
@@ -5869,9 +5929,7 @@ static struct sctp_packet *sctp_abort_pkt_new(const struct sctp_endpoint *ep,
 {
 	struct sctp_packet *packet;
 	struct sctp_chunk *abort;
-	struct net *net;
 
-	net = sock_net(ep->base.sk);
 	packet = sctp_ootb_pkt_new(net, asoc, chunk);
 
 	if (packet) {
@@ -5984,7 +6042,8 @@ void sctp_ootb_pkt_free(struct sctp_packet *packet)
 }
 
 /* Send a stale cookie error when a invalid COOKIE ECHO chunk is found  */
-static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
+static void sctp_send_stale_cookie_err(struct net *net,
+				       const struct sctp_endpoint *ep,
 				       const struct sctp_association *asoc,
 				       const struct sctp_chunk *chunk,
 				       sctp_cmd_seq_t *commands,
@@ -5993,7 +6052,6 @@ static void sctp_send_stale_cookie_err(const struct sctp_endpoint *ep,
 	struct sctp_packet *packet;
 
 	if (err_chunk) {
-		struct net *net = sock_net(ep->base.sk);
 		packet = sctp_ootb_pkt_new(net, asoc, chunk);
 		if (packet) {
 			struct sctp_signed_cookie *cookie;
@@ -6027,7 +6085,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	__u32 tsn;
 	struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
 	struct sock *sk = asoc->base.sk;
-	struct net *net;
+	struct net *net = sock_net(sk);
 	u16 ssn;
 	u16 sid;
 	u8 ordered = 0;
@@ -6144,7 +6202,6 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * No User Data:  This error cause is returned to the originator of a
 	 * DATA chunk if a received DATA chunk has no user data.
 	 */
-	net = sock_net(sk);
 	if (unlikely(0 == datalen)) {
 		err = sctp_make_abort_no_data(asoc, chunk, tsn);
 		if (err) {
-- 
cgit v1.2.3


From f53b5b097e58361668b785eff9f7bcd12b4255ec Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Aug 2012 07:29:08 +0000
Subject: sctp: Push struct net down into sctp_verify_ext_param

Add struct net as a parameter to sctp_verify_param so it can be passed
to sctp_verify_ext_param where struct net will be needed when the sctp
tunables become per net tunables.

Add struct net as a parameter to sctp_verify_init so struct net can be
passed to sctp_verify_param.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  6 +++---
 net/sctp/sm_make_chunk.c   | 11 ++++++-----
 net/sctp/sm_statefuns.c    |  6 +++---
 3 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index b0882f3a3a51..18052b421203 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1410,9 +1410,9 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
 int sctp_has_association(struct net *net, const union sctp_addr *laddr,
 			 const union sctp_addr *paddr);
 
-int sctp_verify_init(const struct sctp_association *asoc, sctp_cid_t,
-		     sctp_init_chunk_t *peer_init, struct sctp_chunk *chunk,
-		     struct sctp_chunk **err_chunk);
+int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
+		     sctp_cid_t, sctp_init_chunk_t *peer_init,
+		     struct sctp_chunk *chunk, struct sctp_chunk **err_chunk);
 int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk,
 		      const union sctp_addr *peer,
 		      sctp_init_chunk_t *init, gfp_t gfp);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fb12835e95c2..a4b096f85a68 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1940,7 +1940,7 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
 	return 0;
 }
 
-static int sctp_verify_ext_param(union sctp_params param)
+static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 {
 	__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
 	int have_auth = 0;
@@ -2081,7 +2081,8 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
  *	SCTP_IERROR_ERROR - stop processing, trigger an ERROR
  * 	SCTP_IERROR_NO_ERROR - continue with the chunk
  */
-static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
+static sctp_ierror_t sctp_verify_param(struct net *net,
+					const struct sctp_association *asoc,
 					union sctp_params param,
 					sctp_cid_t cid,
 					struct sctp_chunk *chunk,
@@ -2110,7 +2111,7 @@ static sctp_ierror_t sctp_verify_param(const struct sctp_association *asoc,
 		break;
 
 	case SCTP_PARAM_SUPPORTED_EXT:
-		if (!sctp_verify_ext_param(param))
+		if (!sctp_verify_ext_param(net, param))
 			return SCTP_IERROR_ABORT;
 		break;
 
@@ -2198,7 +2199,7 @@ fallthrough:
 }
 
 /* Verify the INIT packet before we process it.  */
-int sctp_verify_init(const struct sctp_association *asoc,
+int sctp_verify_init(struct net *net, const struct sctp_association *asoc,
 		     sctp_cid_t cid,
 		     sctp_init_chunk_t *peer_init,
 		     struct sctp_chunk *chunk,
@@ -2245,7 +2246,7 @@ int sctp_verify_init(const struct sctp_association *asoc,
 	/* Verify all the variable length parameters */
 	sctp_walk_params(param, peer_init, init_hdr.params) {
 
-		result = sctp_verify_param(asoc, param, cid, chunk, errp);
+		result = sctp_verify_param(net, asoc, param, cid, chunk, errp);
 		switch (result) {
 		    case SCTP_IERROR_ABORT:
 		    case SCTP_IERROR_NOMEM:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 19f3bff84193..e17ada47afc4 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -364,7 +364,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net,
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
-	if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+	if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
 			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 		/* This chunk contains fatal error. It is to be discarded.
@@ -531,7 +531,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net,
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
-	if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+	if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
 			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 
@@ -1429,7 +1429,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init(
 
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
-	if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
+	if (!sctp_verify_init(net, asoc, chunk->chunk_hdr->type,
 			      (sctp_init_chunk_t *)chunk->chunk_hdr, chunk,
 			      &err_chunk)) {
 		/* This chunk contains fatal error. It is to be discarded.
-- 
cgit v1.2.3


From e1fc3b14f9a90d9591016749289f2c3d7b35fbf4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 7 Aug 2012 07:29:57 +0000
Subject: sctp: Make sysctl tunables per net

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/sctp.h   |  90 ++++++++++++++++++++++
 include/net/sctp/structs.h | 116 ----------------------------
 net/sctp/associola.c       |  10 ++-
 net/sctp/auth.c            |  20 ++++-
 net/sctp/bind_addr.c       |   2 +-
 net/sctp/endpointola.c     |   9 ++-
 net/sctp/input.c           |   2 +-
 net/sctp/protocol.c        | 128 +++++++++++++++----------------
 net/sctp/sm_make_chunk.c   |  47 ++++++------
 net/sctp/sm_statefuns.c    |   4 +-
 net/sctp/sm_statetable.c   |   6 +-
 net/sctp/socket.c          |  65 +++++++++-------
 net/sctp/sysctl.c          | 185 +++++++++++++++++++++++----------------------
 net/sctp/transport.c       |  15 ++--
 14 files changed, 355 insertions(+), 344 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index 9576b60cbd25..5e5eb1f9f14b 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -36,6 +36,96 @@ struct netns_sctp {
 	/* Lock that protects the local_addr_list writers */
 	spinlock_t local_addr_lock;
 
+	/* RFC2960 Section 14. Suggested SCTP Protocol Parameter Values
+	 *
+	 * The following protocol parameters are RECOMMENDED:
+	 *
+	 * RTO.Initial		    - 3	 seconds
+	 * RTO.Min		    - 1	 second
+	 * RTO.Max		   -  60 seconds
+	 * RTO.Alpha		    - 1/8  (3 when converted to right shifts.)
+	 * RTO.Beta		    - 1/4  (2 when converted to right shifts.)
+	 */
+	unsigned int rto_initial;
+	unsigned int rto_min;
+	unsigned int rto_max;
+
+	/* Note: rto_alpha and rto_beta are really defined as inverse
+	 * powers of two to facilitate integer operations.
+	 */
+	int rto_alpha;
+	int rto_beta;
+
+	/* Max.Burst		    - 4 */
+	int max_burst;
+
+	/* Whether Cookie Preservative is enabled(1) or not(0) */
+	int cookie_preserve_enable;
+
+	/* Valid.Cookie.Life	    - 60  seconds  */
+	unsigned int valid_cookie_life;
+
+	/* Delayed SACK timeout  200ms default*/
+	unsigned int sack_timeout;
+
+	/* HB.interval		    - 30 seconds  */
+	unsigned int hb_interval;
+
+	/* Association.Max.Retrans  - 10 attempts
+	 * Path.Max.Retrans	    - 5	 attempts (per destination address)
+	 * Max.Init.Retransmits	    - 8	 attempts
+	 */
+	int max_retrans_association;
+	int max_retrans_path;
+	int max_retrans_init;
+	/* Potentially-Failed.Max.Retrans sysctl value
+	 * taken from:
+	 * http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05
+	 */
+	int pf_retrans;
+
+	/*
+	 * Policy for preforming sctp/socket accounting
+	 * 0   - do socket level accounting, all assocs share sk_sndbuf
+	 * 1   - do sctp accounting, each asoc may use sk_sndbuf bytes
+	 */
+	int sndbuf_policy;
+
+	/*
+	 * Policy for preforming sctp/socket accounting
+	 * 0   - do socket level accounting, all assocs share sk_rcvbuf
+	 * 1   - do sctp accounting, each asoc may use sk_rcvbuf bytes
+	 */
+	int rcvbuf_policy;
+
+	int default_auto_asconf;
+
+	/* Flag to indicate if addip is enabled. */
+	int addip_enable;
+	int addip_noauth;
+
+	/* Flag to indicate if PR-SCTP is enabled. */
+	int prsctp_enable;
+
+	/* Flag to idicate if SCTP-AUTH is enabled */
+	int auth_enable;
+
+	/*
+	 * Policy to control SCTP IPv4 address scoping
+	 * 0   - Disable IPv4 address scoping
+	 * 1   - Enable IPv4 address scoping
+	 * 2   - Selectively allow only IPv4 private addresses
+	 * 3   - Selectively allow only IPv4 link local address
+	 */
+	int scope_policy;
+
+	/* Threshold for rwnd update SACKS.  Receive buffer shifted this many
+	 * bits is an indicator of when to send and window update SACK.
+	 */
+	int rwnd_upd_shift;
+
+	/* Threshold for autoclose timeout, in seconds. */
+	unsigned long max_autoclose;
 };
 
 #endif /* __NETNS_SCTP_H__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 18052b421203..0fef00f5d3ce 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -119,69 +119,6 @@ struct sctp_hashbucket {
 
 /* The SCTP globals structure. */
 extern struct sctp_globals {
-	/* RFC2960 Section 14. Suggested SCTP Protocol Parameter Values
-	 *
-	 * The following protocol parameters are RECOMMENDED:
-	 *
-	 * RTO.Initial		    - 3	 seconds
-	 * RTO.Min		    - 1	 second
-	 * RTO.Max		   -  60 seconds
-	 * RTO.Alpha		    - 1/8  (3 when converted to right shifts.)
-	 * RTO.Beta		    - 1/4  (2 when converted to right shifts.)
-	 */
-	unsigned int rto_initial;
-	unsigned int rto_min;
-	unsigned int rto_max;
-
-	/* Note: rto_alpha and rto_beta are really defined as inverse
-	 * powers of two to facilitate integer operations.
-	 */
-	int rto_alpha;
-	int rto_beta;
-
-	/* Max.Burst		    - 4 */
-	int max_burst;
-
-	/* Whether Cookie Preservative is enabled(1) or not(0) */
-	int cookie_preserve_enable;
-
-	/* Valid.Cookie.Life	    - 60  seconds  */
-	unsigned int valid_cookie_life;
-
-	/* Delayed SACK timeout  200ms default*/
-	unsigned int sack_timeout;
-
-	/* HB.interval		    - 30 seconds  */
-	unsigned int hb_interval;
-
-	/* Association.Max.Retrans  - 10 attempts
-	 * Path.Max.Retrans	    - 5	 attempts (per destination address)
-	 * Max.Init.Retransmits	    - 8	 attempts
-	 */
-	int max_retrans_association;
-	int max_retrans_path;
-	int max_retrans_init;
-
-	/* Potentially-Failed.Max.Retrans sysctl value
-	 * taken from:
-	 * http://tools.ietf.org/html/draft-nishida-tsvwg-sctp-failover-05
-	 */
-	int pf_retrans;
-
-	/*
-	 * Policy for preforming sctp/socket accounting
-	 * 0   - do socket level accounting, all assocs share sk_sndbuf
-	 * 1   - do sctp accounting, each asoc may use sk_sndbuf bytes
-	 */
-	int sndbuf_policy;
-
-	/*
-	 * Policy for preforming sctp/socket accounting
-	 * 0   - do socket level accounting, all assocs share sk_rcvbuf
-	 * 1   - do sctp accounting, each asoc may use sk_rcvbuf bytes
-	 */
-	int rcvbuf_policy;
-
 	/* The following variables are implementation specific.	 */
 
 	/* Default initialization values to be applied to new associations. */
@@ -205,56 +142,11 @@ extern struct sctp_globals {
 	int port_hashsize;
 	struct sctp_bind_hashbucket *port_hashtable;
 
-	int default_auto_asconf;
-	
-	/* Flag to indicate if addip is enabled. */
-	int addip_enable;
-	int addip_noauth_enable;
-
-	/* Flag to indicate if PR-SCTP is enabled. */
-	int prsctp_enable;
-
-	/* Flag to idicate if SCTP-AUTH is enabled */
-	int auth_enable;
-
-	/*
-	 * Policy to control SCTP IPv4 address scoping
-	 * 0   - Disable IPv4 address scoping
-	 * 1   - Enable IPv4 address scoping
-	 * 2   - Selectively allow only IPv4 private addresses
-	 * 3   - Selectively allow only IPv4 link local address
-	 */
-	int ipv4_scope_policy;
-
 	/* Flag to indicate whether computing and verifying checksum
 	 * is disabled. */
         bool checksum_disable;
-
-	/* Threshold for rwnd update SACKS.  Receive buffer shifted this many
-	 * bits is an indicator of when to send and window update SACK.
-	 */
-	int rwnd_update_shift;
-
-	/* Threshold for autoclose timeout, in seconds. */
-	unsigned long max_autoclose;
 } sctp_globals;
 
-#define sctp_rto_initial		(sctp_globals.rto_initial)
-#define sctp_rto_min			(sctp_globals.rto_min)
-#define sctp_rto_max			(sctp_globals.rto_max)
-#define sctp_rto_alpha			(sctp_globals.rto_alpha)
-#define sctp_rto_beta			(sctp_globals.rto_beta)
-#define sctp_max_burst			(sctp_globals.max_burst)
-#define sctp_valid_cookie_life		(sctp_globals.valid_cookie_life)
-#define sctp_cookie_preserve_enable	(sctp_globals.cookie_preserve_enable)
-#define sctp_max_retrans_association	(sctp_globals.max_retrans_association)
-#define sctp_sndbuf_policy	 	(sctp_globals.sndbuf_policy)
-#define sctp_rcvbuf_policy	 	(sctp_globals.rcvbuf_policy)
-#define sctp_max_retrans_path		(sctp_globals.max_retrans_path)
-#define sctp_pf_retrans			(sctp_globals.pf_retrans)
-#define sctp_max_retrans_init		(sctp_globals.max_retrans_init)
-#define sctp_sack_timeout		(sctp_globals.sack_timeout)
-#define sctp_hb_interval		(sctp_globals.hb_interval)
 #define sctp_max_instreams		(sctp_globals.max_instreams)
 #define sctp_max_outstreams		(sctp_globals.max_outstreams)
 #define sctp_address_families		(sctp_globals.address_families)
@@ -264,15 +156,7 @@ extern struct sctp_globals {
 #define sctp_assoc_hashtable		(sctp_globals.assoc_hashtable)
 #define sctp_port_hashsize		(sctp_globals.port_hashsize)
 #define sctp_port_hashtable		(sctp_globals.port_hashtable)
-#define sctp_default_auto_asconf	(sctp_globals.default_auto_asconf)
-#define sctp_scope_policy		(sctp_globals.ipv4_scope_policy)
-#define sctp_addip_enable		(sctp_globals.addip_enable)
-#define sctp_addip_noauth		(sctp_globals.addip_noauth_enable)
-#define sctp_prsctp_enable		(sctp_globals.prsctp_enable)
-#define sctp_auth_enable		(sctp_globals.auth_enable)
 #define sctp_checksum_disable		(sctp_globals.checksum_disable)
-#define sctp_rwnd_upd_shift		(sctp_globals.rwnd_update_shift)
-#define sctp_max_autoclose		(sctp_globals.max_autoclose)
 
 /* SCTP Socket type: UDP or TCP style. */
 typedef enum {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 93a4513c85e0..b1ef3bc301a5 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -82,6 +82,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 					  sctp_scope_t scope,
 					  gfp_t gfp)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_sock *sp;
 	int i;
 	sctp_paramhdr_t *p;
@@ -124,7 +125,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	 * socket values.
 	 */
 	asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
-	asoc->pf_retrans  = sctp_pf_retrans;
+	asoc->pf_retrans  = net->sctp.pf_retrans;
 
 	asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
 	asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
@@ -175,7 +176,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
-		min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ;
+		min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ;
 
 	/* Initializes the timers */
 	for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
@@ -281,7 +282,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	 * and will revert old behavior.
 	 */
 	asoc->peer.asconf_capable = 0;
-	if (sctp_addip_noauth)
+	if (net->sctp.addip_noauth)
 		asoc->peer.asconf_capable = 1;
 	asoc->asconf_addr_del_pending = NULL;
 	asoc->src_out_of_asoc_ok = 0;
@@ -1418,6 +1419,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
 /* Should we send a SACK to update our peer? */
 static inline int sctp_peer_needs_update(struct sctp_association *asoc)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	switch (asoc->state) {
 	case SCTP_STATE_ESTABLISHED:
 	case SCTP_STATE_SHUTDOWN_PENDING:
@@ -1425,7 +1427,7 @@ static inline int sctp_peer_needs_update(struct sctp_association *asoc)
 	case SCTP_STATE_SHUTDOWN_SENT:
 		if ((asoc->rwnd > asoc->a_rwnd) &&
 		    ((asoc->rwnd - asoc->a_rwnd) >= max_t(__u32,
-			   (asoc->base.sk->sk_rcvbuf >> sctp_rwnd_upd_shift),
+			   (asoc->base.sk->sk_rcvbuf >> net->sctp.rwnd_upd_shift),
 			   asoc->pathmtu)))
 			return 1;
 		break;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index bf812048cf6f..aaa6c121ecce 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -392,13 +392,14 @@ nomem:
  */
 int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_auth_bytes	*secret;
 	struct sctp_shared_key *ep_key;
 
 	/* If we don't support AUTH, or peer is not capable
 	 * we don't need to do anything.
 	 */
-	if (!sctp_auth_enable || !asoc->peer.auth_capable)
+	if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
 		return 0;
 
 	/* If the key_id is non-zero and we couldn't find an
@@ -445,11 +446,12 @@ struct sctp_shared_key *sctp_auth_get_shkey(
  */
 int sctp_auth_init_hmacs(struct sctp_endpoint *ep, gfp_t gfp)
 {
+	struct net *net = sock_net(ep->base.sk);
 	struct crypto_hash *tfm = NULL;
 	__u16   id;
 
 	/* if the transforms are already allocted, we are done */
-	if (!sctp_auth_enable) {
+	if (!net->sctp.auth_enable) {
 		ep->auth_hmacs = NULL;
 		return 0;
 	}
@@ -674,7 +676,12 @@ static int __sctp_auth_cid(sctp_cid_t chunk, struct sctp_chunks_param *param)
 /* Check if peer requested that this chunk is authenticated */
 int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
 {
-	if (!sctp_auth_enable || !asoc || !asoc->peer.auth_capable)
+	struct net  *net;
+	if (!asoc)
+		return 0;
+
+	net = sock_net(asoc->base.sk);
+	if (!net->sctp.auth_enable || !asoc->peer.auth_capable)
 		return 0;
 
 	return __sctp_auth_cid(chunk, asoc->peer.peer_chunks);
@@ -683,7 +690,12 @@ int sctp_auth_send_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
 /* Check if we requested that peer authenticate this chunk. */
 int sctp_auth_recv_cid(sctp_cid_t chunk, const struct sctp_association *asoc)
 {
-	if (!sctp_auth_enable || !asoc)
+	struct net *net;
+	if (!asoc)
+		return 0;
+
+	net = sock_net(asoc->base.sk);
+	if (!net->sctp.auth_enable);
 		return 0;
 
 	return __sctp_auth_cid(chunk,
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 23389ba44e39..d886b3bf84f5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -512,7 +512,7 @@ int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t sco
 	 * Address scoping can be selectively controlled via sysctl
 	 * option
 	 */
-	switch (sctp_scope_policy) {
+	switch (net->sctp.scope_policy) {
 	case SCTP_SCOPE_POLICY_DISABLE:
 		return 1;
 	case SCTP_SCOPE_POLICY_ENABLE:
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8315792ef2ba..1859e2bc83d1 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -65,6 +65,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 						struct sock *sk,
 						gfp_t gfp)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_hmac_algo_param *auth_hmacs = NULL;
 	struct sctp_chunks_param *auth_chunks = NULL;
 	struct sctp_shared_key *null_key;
@@ -74,7 +75,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	if (!ep->digest)
 		return NULL;
 
-	if (sctp_auth_enable) {
+	if (net->sctp.auth_enable) {
 		/* Allocate space for HMACS and CHUNKS authentication
 		 * variables.  There are arrays that we encode directly
 		 * into parameters to make the rest of the operations easier.
@@ -106,7 +107,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		/* If the Add-IP functionality is enabled, we must
 		 * authenticate, ASCONF and ASCONF-ACK chunks
 		 */
-		if (sctp_addip_enable) {
+		if (net->sctp.addip_enable) {
 			auth_chunks->chunks[0] = SCTP_CID_ASCONF;
 			auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
 			auth_chunks->param_hdr.length =
@@ -140,14 +141,14 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	INIT_LIST_HEAD(&ep->asocs);
 
 	/* Use SCTP specific send buffer space queues.  */
-	ep->sndbuf_policy = sctp_sndbuf_policy;
+	ep->sndbuf_policy = net->sctp.sndbuf_policy;
 
 	sk->sk_data_ready = sctp_data_ready;
 	sk->sk_write_space = sctp_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
 	/* Get the receive buffer policy for this endpoint */
-	ep->rcvbuf_policy = sctp_rcvbuf_policy;
+	ep->rcvbuf_policy = net->sctp.rcvbuf_policy;
 
 	/* Initialize the secret key used with cookie. */
 	get_random_bytes(&ep->secret_key[0], SCTP_SECRET_SIZE);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a2ceb70ee06c..25dfe7380479 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1094,7 +1094,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 			    break;
 
 		    case SCTP_CID_ASCONF:
-			    if (have_auth || sctp_addip_noauth)
+			    if (have_auth || net->sctp.addip_noauth)
 				    asoc = __sctp_rcv_asconf_lookup(
 							net, ch, laddr,
 							sctp_hdr(skb)->source,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 59965bdea07a..2d518425d598 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1169,6 +1169,70 @@ static int sctp_net_init(struct net *net)
 {
 	int status;
 
+	/*
+	 * 14. Suggested SCTP Protocol Parameter Values
+	 */
+	/* The following protocol parameters are RECOMMENDED:  */
+	/* RTO.Initial              - 3  seconds */
+	net->sctp.rto_initial			= SCTP_RTO_INITIAL;
+	/* RTO.Min                  - 1  second */
+	net->sctp.rto_min	 		= SCTP_RTO_MIN;
+	/* RTO.Max                 -  60 seconds */
+	net->sctp.rto_max 			= SCTP_RTO_MAX;
+	/* RTO.Alpha                - 1/8 */
+	net->sctp.rto_alpha			= SCTP_RTO_ALPHA;
+	/* RTO.Beta                 - 1/4 */
+	net->sctp.rto_beta			= SCTP_RTO_BETA;
+
+	/* Valid.Cookie.Life        - 60  seconds */
+	net->sctp.valid_cookie_life		= SCTP_DEFAULT_COOKIE_LIFE;
+
+	/* Whether Cookie Preservative is enabled(1) or not(0) */
+	net->sctp.cookie_preserve_enable 	= 1;
+
+	/* Max.Burst		    - 4 */
+	net->sctp.max_burst			= SCTP_DEFAULT_MAX_BURST;
+
+	/* Association.Max.Retrans  - 10 attempts
+	 * Path.Max.Retrans         - 5  attempts (per destination address)
+	 * Max.Init.Retransmits     - 8  attempts
+	 */
+	net->sctp.max_retrans_association	= 10;
+	net->sctp.max_retrans_path		= 5;
+	net->sctp.max_retrans_init		= 8;
+
+	/* Sendbuffer growth	    - do per-socket accounting */
+	net->sctp.sndbuf_policy			= 0;
+
+	/* Rcvbuffer growth	    - do per-socket accounting */
+	net->sctp.rcvbuf_policy			= 0;
+
+	/* HB.interval              - 30 seconds */
+	net->sctp.hb_interval			= SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
+
+	/* delayed SACK timeout */
+	net->sctp.sack_timeout			= SCTP_DEFAULT_TIMEOUT_SACK;
+
+	/* Disable ADDIP by default. */
+	net->sctp.addip_enable = 0;
+	net->sctp.addip_noauth = 0;
+	net->sctp.default_auto_asconf = 0;
+
+	/* Enable PR-SCTP by default. */
+	net->sctp.prsctp_enable = 1;
+
+	/* Disable AUTH by default. */
+	net->sctp.auth_enable = 0;
+
+	/* Set SCOPE policy to enabled */
+	net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE;
+
+	/* Set the default rwnd update threshold */
+	net->sctp.rwnd_upd_shift = SCTP_DEFAULT_RWND_SHIFT;
+
+	/* Initialize maximum autoclose timeout. */
+	net->sctp.max_autoclose		= INT_MAX / HZ;
+
 	status = sctp_sysctl_net_register(net);
 	if (status)
 		goto err_sysctl_register;
@@ -1272,59 +1336,12 @@ SCTP_STATIC __init int sctp_init(void)
 	if (status)
 		goto err_percpu_counter_init;
 
-	/*
-	 * 14. Suggested SCTP Protocol Parameter Values
-	 */
-	/* The following protocol parameters are RECOMMENDED:  */
-	/* RTO.Initial              - 3  seconds */
-	sctp_rto_initial		= SCTP_RTO_INITIAL;
-	/* RTO.Min                  - 1  second */
-	sctp_rto_min	 		= SCTP_RTO_MIN;
-	/* RTO.Max                 -  60 seconds */
-	sctp_rto_max 			= SCTP_RTO_MAX;
-	/* RTO.Alpha                - 1/8 */
-	sctp_rto_alpha	        	= SCTP_RTO_ALPHA;
-	/* RTO.Beta                 - 1/4 */
-	sctp_rto_beta			= SCTP_RTO_BETA;
-
-	/* Valid.Cookie.Life        - 60  seconds */
-	sctp_valid_cookie_life		= SCTP_DEFAULT_COOKIE_LIFE;
-
-	/* Whether Cookie Preservative is enabled(1) or not(0) */
-	sctp_cookie_preserve_enable 	= 1;
-
-	/* Max.Burst		    - 4 */
-	sctp_max_burst 			= SCTP_DEFAULT_MAX_BURST;
-
-	/* Association.Max.Retrans  - 10 attempts
-	 * Path.Max.Retrans         - 5  attempts (per destination address)
-	 * Max.Init.Retransmits     - 8  attempts
-	 */
-	sctp_max_retrans_association 	= 10;
-	sctp_max_retrans_path		= 5;
-	sctp_max_retrans_init		= 8;
-
-	/* Sendbuffer growth	    - do per-socket accounting */
-	sctp_sndbuf_policy		= 0;
-
-	/* Rcvbuffer growth	    - do per-socket accounting */
-	sctp_rcvbuf_policy		= 0;
-
-	/* HB.interval              - 30 seconds */
-	sctp_hb_interval		= SCTP_DEFAULT_TIMEOUT_HEARTBEAT;
-
-	/* delayed SACK timeout */
-	sctp_sack_timeout		= SCTP_DEFAULT_TIMEOUT_SACK;
-
 	/* Implementation specific variables. */
 
 	/* Initialize default stream count setup information. */
 	sctp_max_instreams    		= SCTP_DEFAULT_INSTREAMS;
 	sctp_max_outstreams   		= SCTP_DEFAULT_OUTSTREAMS;
 
-	/* Initialize maximum autoclose timeout. */
-	sctp_max_autoclose		= INT_MAX / HZ;
-
 	/* Initialize handle used for association ids. */
 	idr_init(&sctp_assocs_id);
 
@@ -1411,23 +1428,6 @@ SCTP_STATIC __init int sctp_init(void)
 	pr_info("Hash tables configured (established %d bind %d)\n",
 		sctp_assoc_hashsize, sctp_port_hashsize);
 
-	/* Disable ADDIP by default. */
-	sctp_addip_enable = 0;
-	sctp_addip_noauth = 0;
-	sctp_default_auto_asconf = 0;
-
-	/* Enable PR-SCTP by default. */
-	sctp_prsctp_enable = 1;
-
-	/* Disable AUTH by default. */
-	sctp_auth_enable = 0;
-
-	/* Set SCOPE policy to enabled */
-	sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE;
-
-	/* Set the default rwnd update threshold */
-	sctp_rwnd_upd_shift		= SCTP_DEFAULT_RWND_SHIFT;
-
 	sctp_sysctl_register();
 
 	INIT_LIST_HEAD(&sctp_address_families);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index a4b096f85a68..fbe1636309a7 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -198,6 +198,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 			     const struct sctp_bind_addr *bp,
 			     gfp_t gfp, int vparam_len)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	sctp_inithdr_t init;
 	union sctp_params addrs;
 	size_t chunksize;
@@ -237,7 +238,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
 	chunksize += sizeof(ecap_param);
 
-	if (sctp_prsctp_enable)
+	if (net->sctp.prsctp_enable)
 		chunksize += sizeof(prsctp_param);
 
 	/* ADDIP: Section 4.2.7:
@@ -245,7 +246,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	 *  the ASCONF,the ASCONF-ACK, and the AUTH  chunks in its INIT and
 	 *  INIT-ACK parameters.
 	 */
-	if (sctp_addip_enable) {
+	if (net->sctp.addip_enable) {
 		extensions[num_ext] = SCTP_CID_ASCONF;
 		extensions[num_ext+1] = SCTP_CID_ASCONF_ACK;
 		num_ext += 2;
@@ -257,7 +258,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	chunksize += vparam_len;
 
 	/* Account for AUTH related parameters */
-	if (sctp_auth_enable) {
+	if (net->sctp.auth_enable) {
 		/* Add random parameter length*/
 		chunksize += sizeof(asoc->c.auth_random);
 
@@ -331,7 +332,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 		sctp_addto_param(retval, num_ext, extensions);
 	}
 
-	if (sctp_prsctp_enable)
+	if (net->sctp.prsctp_enable)
 		sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
 
 	if (sp->adaptation_ind) {
@@ -342,7 +343,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	}
 
 	/* Add SCTP-AUTH chunks to the parameter list */
-	if (sctp_auth_enable) {
+	if (net->sctp.auth_enable) {
 		sctp_addto_chunk(retval, sizeof(asoc->c.auth_random),
 				 asoc->c.auth_random);
 		if (auth_hmacs)
@@ -1964,10 +1965,10 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 	 * only if ADD-IP is turned on and we are not backward-compatible
 	 * mode.
 	 */
-	if (sctp_addip_noauth)
+	if (net->sctp.addip_noauth)
 		return 1;
 
-	if (sctp_addip_enable && !have_auth && have_asconf)
+	if (net->sctp.addip_enable && !have_auth && have_asconf)
 		return 0;
 
 	return 1;
@@ -1976,13 +1977,14 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 static void sctp_process_ext_param(struct sctp_association *asoc,
 				    union sctp_params param)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	__u16 num_ext = ntohs(param.p->length) - sizeof(sctp_paramhdr_t);
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
 		switch (param.ext->chunks[i]) {
 		    case SCTP_CID_FWD_TSN:
-			    if (sctp_prsctp_enable &&
+			    if (net->sctp.prsctp_enable &&
 				!asoc->peer.prsctp_capable)
 				    asoc->peer.prsctp_capable = 1;
 			    break;
@@ -1990,12 +1992,12 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 			    /* if the peer reports AUTH, assume that he
 			     * supports AUTH.
 			     */
-			    if (sctp_auth_enable)
+			    if (net->sctp.auth_enable)
 				    asoc->peer.auth_capable = 1;
 			    break;
 		    case SCTP_CID_ASCONF:
 		    case SCTP_CID_ASCONF_ACK:
-			    if (sctp_addip_enable)
+			    if (net->sctp.addip_enable)
 				    asoc->peer.asconf_capable = 1;
 			    break;
 		    default:
@@ -2116,7 +2118,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
-		if (sctp_addip_enable)
+		if (net->sctp.addip_enable)
 			break;
 		goto fallthrough;
 
@@ -2127,12 +2129,12 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_FWD_TSN_SUPPORT:
-		if (sctp_prsctp_enable)
+		if (net->sctp.prsctp_enable)
 			break;
 		goto fallthrough;
 
 	case SCTP_PARAM_RANDOM:
-		if (!sctp_auth_enable)
+		if (!net->sctp.auth_enable)
 			goto fallthrough;
 
 		/* SCTP-AUTH: Secion 6.1
@@ -2149,7 +2151,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_CHUNKS:
-		if (!sctp_auth_enable)
+		if (!net->sctp.auth_enable)
 			goto fallthrough;
 
 		/* SCTP-AUTH: Section 3.2
@@ -2165,7 +2167,7 @@ static sctp_ierror_t sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_HMAC_ALGO:
-		if (!sctp_auth_enable)
+		if (!net->sctp.auth_enable)
 			goto fallthrough;
 
 		hmacs = (struct sctp_hmac_algo_param *)param.p;
@@ -2271,6 +2273,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 		      const union sctp_addr *peer_addr,
 		      sctp_init_chunk_t *peer_init, gfp_t gfp)
 {
+	struct net *net = sock_net(asoc->base.sk);
 	union sctp_params param;
 	struct sctp_transport *transport;
 	struct list_head *pos, *temp;
@@ -2327,7 +2330,7 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	 * also give us an option to silently ignore the packet, which
 	 * is what we'll do here.
 	 */
-	if (!sctp_addip_noauth &&
+	if (!net->sctp.addip_noauth &&
 	     (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) {
 		asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP |
 						  SCTP_PARAM_DEL_IP |
@@ -2502,7 +2505,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_COOKIE_PRESERVATIVE:
-		if (!sctp_cookie_preserve_enable)
+		if (!net->sctp.cookie_preserve_enable)
 			break;
 
 		stale = ntohl(param.life->lifespan_increment);
@@ -2582,7 +2585,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
-		if (!sctp_addip_enable)
+		if (!net->sctp.addip_enable)
 			goto fall_through;
 
 		addr_param = param.v + sizeof(sctp_addip_param_t);
@@ -2609,7 +2612,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_FWD_TSN_SUPPORT:
-		if (sctp_prsctp_enable) {
+		if (net->sctp.prsctp_enable) {
 			asoc->peer.prsctp_capable = 1;
 			break;
 		}
@@ -2617,7 +2620,7 @@ do_addr_param:
 		goto fall_through;
 
 	case SCTP_PARAM_RANDOM:
-		if (!sctp_auth_enable)
+		if (!net->sctp.auth_enable)
 			goto fall_through;
 
 		/* Save peer's random parameter */
@@ -2630,7 +2633,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_HMAC_ALGO:
-		if (!sctp_auth_enable)
+		if (!net->sctp.auth_enable)
 			goto fall_through;
 
 		/* Save peer's HMAC list */
@@ -2646,7 +2649,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_CHUNKS:
-		if (!sctp_auth_enable)
+		if (!net->sctp.auth_enable)
 			goto fall_through;
 
 		asoc->peer.peer_chunks = kmemdup(param.p,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e17ada47afc4..094813b6c3c3 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3586,7 +3586,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
 	 * is received unauthenticated it MUST be silently discarded as
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
-	if (!sctp_addip_noauth && !chunk->auth)
+	if (!net->sctp.addip_noauth && !chunk->auth)
 		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ASCONF ADDIP chunk has a valid length.  */
@@ -3713,7 +3713,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
 	 * is received unauthenticated it MUST be silently discarded as
 	 * described in [I-D.ietf-tsvwg-sctp-auth].
 	 */
-	if (!sctp_addip_noauth && !asconf_ack->auth)
+	if (!net->sctp.addip_noauth && !asconf_ack->auth)
 		return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
 
 	/* Make sure that the ADDIP chunk has a valid length.  */
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 4a029d798287..84d98d8a5a74 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -918,12 +918,12 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
 	if (cid <= SCTP_CID_BASE_MAX)
 		return &chunk_event_table[cid][state];
 
-	if (sctp_prsctp_enable) {
+	if (net->sctp.prsctp_enable) {
 		if (cid == SCTP_CID_FWD_TSN)
 			return &prsctp_chunk_event_table[0][state];
 	}
 
-	if (sctp_addip_enable) {
+	if (net->sctp.addip_enable) {
 		if (cid == SCTP_CID_ASCONF)
 			return &addip_chunk_event_table[0][state];
 
@@ -931,7 +931,7 @@ static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net,
 			return &addip_chunk_event_table[1][state];
 	}
 
-	if (sctp_auth_enable) {
+	if (net->sctp.auth_enable) {
 		if (cid == SCTP_CID_AUTH)
 			return &auth_chunk_event_table[0][state];
 	}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a6a4226a922f..d37d24ff197f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -516,6 +516,7 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 				   struct sockaddr	*addrs,
 				   int 			addrcnt)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_sock		*sp;
 	struct sctp_endpoint		*ep;
 	struct sctp_association		*asoc;
@@ -530,7 +531,7 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 	int 				i;
 	int 				retval = 0;
 
-	if (!sctp_addip_enable)
+	if (!net->sctp.addip_enable)
 		return retval;
 
 	sp = sctp_sk(sk);
@@ -718,6 +719,7 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 				   struct sockaddr	*addrs,
 				   int			addrcnt)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_sock	*sp;
 	struct sctp_endpoint	*ep;
 	struct sctp_association	*asoc;
@@ -733,7 +735,7 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 	int			stored = 0;
 
 	chunk = NULL;
-	if (!sctp_addip_enable)
+	if (!net->sctp.addip_enable)
 		return retval;
 
 	sp = sctp_sk(sk);
@@ -3039,6 +3041,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
 					     unsigned int optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_sock	*sp;
 	struct sctp_association	*asoc = NULL;
 	struct sctp_setpeerprim	prim;
@@ -3048,7 +3051,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
 
 	sp = sctp_sk(sk);
 
-	if (!sctp_addip_enable)
+	if (!net->sctp.addip_enable)
 		return -EPERM;
 
 	if (optlen != sizeof(struct sctp_setpeerprim))
@@ -3285,9 +3288,10 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
 				      char __user *optval,
 				      unsigned int optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_authchunk val;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	if (optlen != sizeof(struct sctp_authchunk))
@@ -3317,11 +3321,12 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
 				      char __user *optval,
 				      unsigned int optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_hmacalgo *hmacs;
 	u32 idents;
 	int err;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	if (optlen < sizeof(struct sctp_hmacalgo))
@@ -3354,11 +3359,12 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 				    char __user *optval,
 				    unsigned int optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_authkey *authkey;
 	struct sctp_association *asoc;
 	int ret;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	if (optlen <= sizeof(struct sctp_authkey))
@@ -3395,10 +3401,11 @@ static int sctp_setsockopt_active_key(struct sock *sk,
 				      char __user *optval,
 				      unsigned int optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	if (optlen != sizeof(struct sctp_authkeyid))
@@ -3423,10 +3430,11 @@ static int sctp_setsockopt_del_key(struct sock *sk,
 				   char __user *optval,
 				   unsigned int optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	if (optlen != sizeof(struct sctp_authkeyid))
@@ -3849,6 +3857,7 @@ out:
  */
 SCTP_STATIC int sctp_init_sock(struct sock *sk)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_endpoint *ep;
 	struct sctp_sock *sp;
 
@@ -3878,7 +3887,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->default_timetolive = 0;
 
 	sp->default_rcv_context = 0;
-	sp->max_burst = sctp_max_burst;
+	sp->max_burst = net->sctp.max_burst;
 
 	/* Initialize default setup parameters. These parameters
 	 * can be modified with the SCTP_INITMSG socket option or
@@ -3886,24 +3895,24 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	 */
 	sp->initmsg.sinit_num_ostreams   = sctp_max_outstreams;
 	sp->initmsg.sinit_max_instreams  = sctp_max_instreams;
-	sp->initmsg.sinit_max_attempts   = sctp_max_retrans_init;
-	sp->initmsg.sinit_max_init_timeo = sctp_rto_max;
+	sp->initmsg.sinit_max_attempts   = net->sctp.max_retrans_init;
+	sp->initmsg.sinit_max_init_timeo = net->sctp.rto_max;
 
 	/* Initialize default RTO related parameters.  These parameters can
 	 * be modified for with the SCTP_RTOINFO socket option.
 	 */
-	sp->rtoinfo.srto_initial = sctp_rto_initial;
-	sp->rtoinfo.srto_max     = sctp_rto_max;
-	sp->rtoinfo.srto_min     = sctp_rto_min;
+	sp->rtoinfo.srto_initial = net->sctp.rto_initial;
+	sp->rtoinfo.srto_max     = net->sctp.rto_max;
+	sp->rtoinfo.srto_min     = net->sctp.rto_min;
 
 	/* Initialize default association related parameters. These parameters
 	 * can be modified with the SCTP_ASSOCINFO socket option.
 	 */
-	sp->assocparams.sasoc_asocmaxrxt = sctp_max_retrans_association;
+	sp->assocparams.sasoc_asocmaxrxt = net->sctp.max_retrans_association;
 	sp->assocparams.sasoc_number_peer_destinations = 0;
 	sp->assocparams.sasoc_peer_rwnd = 0;
 	sp->assocparams.sasoc_local_rwnd = 0;
-	sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life;
+	sp->assocparams.sasoc_cookie_life = net->sctp.valid_cookie_life;
 
 	/* Initialize default event subscriptions. By default, all the
 	 * options are off.
@@ -3913,10 +3922,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	/* Default Peer Address Parameters.  These defaults can
 	 * be modified via SCTP_PEER_ADDR_PARAMS
 	 */
-	sp->hbinterval  = sctp_hb_interval;
-	sp->pathmaxrxt  = sctp_max_retrans_path;
+	sp->hbinterval  = net->sctp.hb_interval;
+	sp->pathmaxrxt  = net->sctp.max_retrans_path;
 	sp->pathmtu     = 0; // allow default discovery
-	sp->sackdelay   = sctp_sack_timeout;
+	sp->sackdelay   = net->sctp.sack_timeout;
 	sp->sackfreq	= 2;
 	sp->param_flags = SPP_HB_ENABLE |
 			  SPP_PMTUD_ENABLE |
@@ -3967,10 +3976,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 
 	local_bh_disable();
 	percpu_counter_inc(&sctp_sockets_allocated);
-	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-	if (sctp_default_auto_asconf) {
+	sock_prot_inuse_add(net, sk->sk_prot, 1);
+	if (net->sctp.default_auto_asconf) {
 		list_add_tail(&sp->auto_asconf_list,
-		    &sock_net(sk)->sctp.auto_asconf_splist);
+		    &net->sctp.auto_asconf_splist);
 		sp->do_auto_asconf = 1;
 	} else
 		sp->do_auto_asconf = 0;
@@ -5307,12 +5316,13 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
 static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_hmacalgo  __user *p = (void __user *)optval;
 	struct sctp_hmac_algo_param *hmacs;
 	__u16 data_len = 0;
 	u32 num_idents;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	hmacs = sctp_sk(sk)->ep->auth_hmacs_list;
@@ -5336,10 +5346,11 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len,
 static int sctp_getsockopt_active_key(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	if (len < sizeof(struct sctp_authkeyid))
@@ -5368,6 +5379,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_authchunks __user *p = (void __user *)optval;
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
@@ -5375,7 +5387,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 	u32    num_chunks = 0;
 	char __user *to;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	if (len < sizeof(struct sctp_authchunks))
@@ -5411,6 +5423,7 @@ num:
 static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
+	struct net *net = sock_net(sk);
 	struct sctp_authchunks __user *p = (void __user *)optval;
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
@@ -5418,7 +5431,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	u32    num_chunks = 0;
 	char __user *to;
 
-	if (!sctp_auth_enable)
+	if (!net->sctp.auth_enable)
 		return -EACCES;
 
 	if (len < sizeof(struct sctp_authchunks))
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index bee36c408dde..70e3ba5cb50b 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -63,9 +63,35 @@ extern int sysctl_sctp_rmem[3];
 extern int sysctl_sctp_wmem[3];
 
 static ctl_table sctp_table[] = {
+	{
+		.procname	= "sctp_mem",
+		.data		= &sysctl_sctp_mem,
+		.maxlen		= sizeof(sysctl_sctp_mem),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax
+	},
+	{
+		.procname	= "sctp_rmem",
+		.data		= &sysctl_sctp_rmem,
+		.maxlen		= sizeof(sysctl_sctp_rmem),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sctp_wmem",
+		.data		= &sysctl_sctp_wmem,
+		.maxlen		= sizeof(sysctl_sctp_wmem),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+
+	{ /* sentinel */ }
+};
+
+static ctl_table sctp_net_table[] = {
 	{
 		.procname	= "rto_initial",
-		.data		= &sctp_rto_initial,
+		.data		= &init_net.sctp.rto_initial,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -74,7 +100,7 @@ static ctl_table sctp_table[] = {
 	},
 	{
 		.procname	= "rto_min",
-		.data		= &sctp_rto_min,
+		.data		= &init_net.sctp.rto_min,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -83,7 +109,7 @@ static ctl_table sctp_table[] = {
 	},
 	{
 		.procname	= "rto_max",
-		.data		= &sctp_rto_max,
+		.data		= &init_net.sctp.rto_max,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -91,17 +117,22 @@ static ctl_table sctp_table[] = {
 		.extra2         = &timer_max
 	},
 	{
-		.procname	= "valid_cookie_life",
-		.data		= &sctp_valid_cookie_life,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1         = &one,
-		.extra2         = &timer_max
+		.procname	= "rto_alpha_exp_divisor",
+		.data		= &init_net.sctp.rto_alpha,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "rto_beta_exp_divisor",
+		.data		= &init_net.sctp.rto_beta,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "max_burst",
-		.data		= &sctp_max_burst,
+		.data		= &init_net.sctp.max_burst,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -109,31 +140,42 @@ static ctl_table sctp_table[] = {
 		.extra2		= &int_max
 	},
 	{
-		.procname	= "association_max_retrans",
-		.data		= &sctp_max_retrans_association,
+		.procname	= "cookie_preserve_enable",
+		.data		= &init_net.sctp.cookie_preserve_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "valid_cookie_life",
+		.data		= &init_net.sctp.valid_cookie_life,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-		.extra2		= &int_max
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
-		.procname	= "sndbuf_policy",
-		.data		= &sctp_sndbuf_policy,
+		.procname	= "sack_timeout",
+		.data		= &init_net.sctp.sack_timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1         = &sack_timer_min,
+		.extra2         = &sack_timer_max,
 	},
 	{
-		.procname	= "rcvbuf_policy",
-		.data		= &sctp_rcvbuf_policy,
-		.maxlen		= sizeof(int),
+		.procname	= "hb_interval",
+		.data		= &init_net.sctp.hb_interval,
+		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1         = &one,
+		.extra2         = &timer_max
 	},
 	{
-		.procname	= "path_max_retrans",
-		.data		= &sctp_max_retrans_path,
+		.procname	= "association_max_retrans",
+		.data		= &init_net.sctp.max_retrans_association,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -141,17 +183,17 @@ static ctl_table sctp_table[] = {
 		.extra2		= &int_max
 	},
 	{
-		.procname	= "pf_retrans",
-		.data		= &sctp_pf_retrans,
+		.procname	= "path_max_retrans",
+		.data		= &init_net.sctp.max_retrans_path,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &zero,
+		.extra1		= &one,
 		.extra2		= &int_max
 	},
 	{
 		.procname	= "max_init_retransmits",
-		.data		= &sctp_max_retrans_init,
+		.data		= &init_net.sctp.max_retrans_init,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -159,103 +201,66 @@ static ctl_table sctp_table[] = {
 		.extra2		= &int_max
 	},
 	{
-		.procname	= "hb_interval",
-		.data		= &sctp_hb_interval,
-		.maxlen		= sizeof(unsigned int),
+		.procname	= "pf_retrans",
+		.data		= &init_net.sctp.pf_retrans,
+		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1         = &one,
-		.extra2         = &timer_max
+		.extra1		= &zero,
+		.extra2		= &int_max
 	},
 	{
-		.procname	= "cookie_preserve_enable",
-		.data		= &sctp_cookie_preserve_enable,
+		.procname	= "sndbuf_policy",
+		.data		= &init_net.sctp.sndbuf_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
-		.procname	= "rto_alpha_exp_divisor",
-		.data		= &sctp_rto_alpha,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "rto_beta_exp_divisor",
-		.data		= &sctp_rto_beta,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "addip_enable",
-		.data		= &sctp_addip_enable,
+		.procname	= "rcvbuf_policy",
+		.data		= &init_net.sctp.rcvbuf_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "default_auto_asconf",
-		.data		= &sctp_default_auto_asconf,
+		.data		= &init_net.sctp.default_auto_asconf,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
-		.procname	= "prsctp_enable",
-		.data		= &sctp_prsctp_enable,
+		.procname	= "addip_enable",
+		.data		= &init_net.sctp.addip_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
-		.procname	= "sack_timeout",
-		.data		= &sctp_sack_timeout,
+		.procname	= "addip_noauth_enable",
+		.data		= &init_net.sctp.addip_noauth,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1         = &sack_timer_min,
-		.extra2         = &sack_timer_max,
-	},
-	{
-		.procname	= "sctp_mem",
-		.data		= &sysctl_sctp_mem,
-		.maxlen		= sizeof(sysctl_sctp_mem),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax
-	},
-	{
-		.procname	= "sctp_rmem",
-		.data		= &sysctl_sctp_rmem,
-		.maxlen		= sizeof(sysctl_sctp_rmem),
-		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
-		.procname	= "sctp_wmem",
-		.data		= &sysctl_sctp_wmem,
-		.maxlen		= sizeof(sysctl_sctp_wmem),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "auth_enable",
-		.data		= &sctp_auth_enable,
+		.procname	= "prsctp_enable",
+		.data		= &init_net.sctp.prsctp_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
-		.procname	= "addip_noauth_enable",
-		.data		= &sctp_addip_noauth,
+		.procname	= "auth_enable",
+		.data		= &init_net.sctp.auth_enable,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "addr_scope_policy",
-		.data		= &sctp_scope_policy,
+		.data		= &init_net.sctp.scope_policy,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
@@ -264,7 +269,7 @@ static ctl_table sctp_table[] = {
 	},
 	{
 		.procname	= "rwnd_update_shift",
-		.data		= &sctp_rwnd_upd_shift,
+		.data		= &init_net.sctp.rwnd_upd_shift,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,
@@ -273,7 +278,7 @@ static ctl_table sctp_table[] = {
 	},
 	{
 		.procname	= "max_autoclose",
-		.data		= &sctp_max_autoclose,
+		.data		= &init_net.sctp.max_autoclose,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
@@ -284,18 +289,18 @@ static ctl_table sctp_table[] = {
 	{ /* sentinel */ }
 };
 
-static ctl_table sctp_net_table[] = {
-	{ /* sentinel */ }
-};
-
 int sctp_sysctl_net_register(struct net *net)
 {
 	struct ctl_table *table;
+	int i;
 
 	table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
 
+	for (i = 0; table[i].data; i++)
+		table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+
 	net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
 	return 0;
 }
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index aada963c9d6b..953c21e4af97 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -77,7 +77,7 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
 	 * given destination transport address, set RTO to the protocol
 	 * parameter 'RTO.Initial'.
 	 */
-	peer->rto = msecs_to_jiffies(sctp_rto_initial);
+	peer->rto = msecs_to_jiffies(net->sctp.rto_initial);
 
 	peer->last_time_heard = jiffies;
 	peer->last_time_ecne_reduced = jiffies;
@@ -87,8 +87,8 @@ static struct sctp_transport *sctp_transport_init(struct net *net,
 			    SPP_SACKDELAY_ENABLE;
 
 	/* Initialize the default path max_retrans.  */
-	peer->pathmaxrxt  = sctp_max_retrans_path;
-	peer->pf_retrans  = sctp_pf_retrans;
+	peer->pathmaxrxt  = net->sctp.max_retrans_path;
+	peer->pf_retrans  = net->sctp.pf_retrans;
 
 	INIT_LIST_HEAD(&peer->transmitted);
 	INIT_LIST_HEAD(&peer->send_ready);
@@ -318,6 +318,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 	SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return);
 
 	if (tp->rttvar || tp->srtt) {
+		struct net *net = sock_net(tp->asoc->base.sk);
 		/* 6.3.1 C3) When a new RTT measurement R' is made, set
 		 * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
 		 * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
@@ -329,10 +330,10 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 		 * For example, assuming the default value of RTO.Alpha of
 		 * 1/8, rto_alpha would be expressed as 3.
 		 */
-		tp->rttvar = tp->rttvar - (tp->rttvar >> sctp_rto_beta)
-			+ ((abs(tp->srtt - rtt)) >> sctp_rto_beta);
-		tp->srtt = tp->srtt - (tp->srtt >> sctp_rto_alpha)
-			+ (rtt >> sctp_rto_alpha);
+		tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
+			+ ((abs(tp->srtt - rtt)) >> net->sctp.rto_beta);
+		tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
+			+ (rtt >> net->sctp.rto_alpha);
 	} else {
 		/* 6.3.1 C2) When the first RTT measurement R is made, set
 		 * SRTT <- R, RTTVAR <- R/2.
-- 
cgit v1.2.3


From 1f07b62f3205f6ed41759df2892eaf433bc051a1 Mon Sep 17 00:00:00 2001
From: Cong Wang <amwang@redhat.com>
Date: Wed, 15 Aug 2012 18:18:11 +0800
Subject: sctp: fix a compile error in sctp.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I got the following compile error:

In file included from include/net/sctp/checksum.h:46:0,
                 from net/ipv4/netfilter/nf_nat_proto_sctp.c:14:
include/net/sctp/sctp.h: In function ‘sctp_dbg_objcnt_init’:
include/net/sctp/sctp.h:370:88: error: parameter name omitted
include/net/sctp/sctp.h: In function ‘sctp_dbg_objcnt_exit’:
include/net/sctp/sctp.h:371:88: error: parameter name omitted

which is caused by

	commit 13d782f6b4fbbaf9d0380a9947deb45a9de46ae7
	Author: Eric W. Biederman <ebiederm@xmission.com>
	Date:   Mon Aug 6 08:45:15 2012 +0000

	    sctp: Make the proc files per network namespace.

This patch could fix it.

Cc: David S. Miller <davem@davemloft.net>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index aaa82923bbaa..9c6414f553f9 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -367,8 +367,8 @@ void sctp_dbg_objcnt_exit(struct net *);
 #define SCTP_DBG_OBJCNT_INC(name)
 #define SCTP_DBG_OBJCNT_DEC(name)
 
-static inline void sctp_dbg_objcnt_init(struct net *) { return; }
-static inline void sctp_dbg_objcnt_exit(struct net *) { return; }
+static inline void sctp_dbg_objcnt_init(struct net *net) { return; }
+static inline void sctp_dbg_objcnt_exit(struct net *net) { return; }
 
 #endif /* CONFIG_SCTP_DBG_OBJCOUNT */
 
-- 
cgit v1.2.3


From 65e0736bc2ac314bd374e93c24dd0698ac5ee66d Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@windriver.com>
Date: Wed, 15 Aug 2012 10:13:47 +0800
Subject: xfrm: remove redundant parameter "int dir" in struct xfrm_mgr.acquire

Sematically speaking, xfrm_mgr.acquire is called when kernel intends to ask
user space IKE daemon to negotiate SAs with peers. IOW the direction will
*always* be XFRM_POLICY_OUT, so remove int dir for clarity.

Signed-off-by: Fan Du <fan.du@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    | 2 +-
 net/key/af_key.c      | 4 ++--
 net/xfrm/xfrm_state.c | 2 +-
 net/xfrm/xfrm_user.c  | 9 ++++-----
 4 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 62b619e82a90..5e1662dbb83c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -571,7 +571,7 @@ struct xfrm_mgr {
 	struct list_head	list;
 	char			*id;
 	int			(*notify)(struct xfrm_state *x, const struct km_event *c);
-	int			(*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir);
+	int			(*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp);
 	struct xfrm_policy	*(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir);
 	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
 	int			(*notify_policy)(struct xfrm_policy *x, int dir, const struct km_event *c);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 34e418508a67..ec7d161c129b 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3024,7 +3024,7 @@ static u32 get_acqseq(void)
 	return res;
 }
 
-static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir)
+static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
 {
 	struct sk_buff *skb;
 	struct sadb_msg *hdr;
@@ -3105,7 +3105,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
 	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
 	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
 	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
-	pol->sadb_x_policy_dir = dir+1;
+	pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1;
 	pol->sadb_x_policy_id = xp->index;
 
 	/* Set sadb_comb's. */
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 87cd0e4d4282..7856c33898fa 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1700,7 +1700,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
 
 	read_lock(&xfrm_km_lock);
 	list_for_each_entry(km, &xfrm_km_list, list) {
-		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
+		acqret = km->acquire(x, t, pol);
 		if (!acqret)
 			err = acqret;
 	}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e75d8e47f35c..ab58034c42d6 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2567,8 +2567,7 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x,
 }
 
 static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
-			 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
-			 int dir)
+			 struct xfrm_tmpl *xt, struct xfrm_policy *xp)
 {
 	__u32 seq = xfrm_get_acqseq();
 	struct xfrm_user_acquire *ua;
@@ -2583,7 +2582,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 	memcpy(&ua->id, &x->id, sizeof(ua->id));
 	memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
 	memcpy(&ua->sel, &x->sel, sizeof(ua->sel));
-	copy_to_user_policy(xp, &ua->policy, dir);
+	copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT);
 	ua->aalgos = xt->aalgos;
 	ua->ealgos = xt->ealgos;
 	ua->calgos = xt->calgos;
@@ -2605,7 +2604,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 }
 
 static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
-			     struct xfrm_policy *xp, int dir)
+			     struct xfrm_policy *xp)
 {
 	struct net *net = xs_net(x);
 	struct sk_buff *skb;
@@ -2614,7 +2613,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 	if (skb == NULL)
 		return -ENOMEM;
 
-	if (build_acquire(skb, x, xt, xp, dir) < 0)
+	if (build_acquire(skb, x, xt, xp) < 0)
 		BUG();
 
 	return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC);
-- 
cgit v1.2.3


From 16f01365fa01150bf3606fe702a80a03ec87953a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 16 Aug 2012 05:34:22 +0000
Subject: packet: Report rings cfg via diag engine

One extension bit may result in two nlattrs -- one per ring type.
If some ring type is not configured, then the respective nlatts
will be empty.

The structure reported contains the data, that is given to the
corresponding ring setup socket option.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/packet_diag.h | 13 ++++++++++++
 net/packet/diag.c           | 48 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/packet_diag.h b/include/linux/packet_diag.h
index ea2e8923bd7e..34ade828979b 100644
--- a/include/linux/packet_diag.h
+++ b/include/linux/packet_diag.h
@@ -14,6 +14,7 @@ struct packet_diag_req {
 
 #define PACKET_SHOW_INFO	0x00000001 /* Basic packet_sk information */
 #define PACKET_SHOW_MCLIST	0x00000002 /* A set of packet_diag_mclist-s */
+#define PACKET_SHOW_RING_CFG	0x00000004 /* Rings configuration parameters */
 
 struct packet_diag_msg {
 	__u8	pdiag_family;
@@ -27,6 +28,8 @@ struct packet_diag_msg {
 enum {
 	PACKET_DIAG_INFO,
 	PACKET_DIAG_MCLIST,
+	PACKET_DIAG_RX_RING,
+	PACKET_DIAG_TX_RING,
 
 	PACKET_DIAG_MAX,
 };
@@ -54,4 +57,14 @@ struct packet_diag_mclist {
 	__u8	pdmc_addr[MAX_ADDR_LEN];
 };
 
+struct packet_diag_ring {
+	__u32	pdr_block_size;
+	__u32	pdr_block_nr;
+	__u32	pdr_frame_size;
+	__u32	pdr_frame_nr;
+	__u32	pdr_retire_tmo;
+	__u32	pdr_sizeof_priv;
+	__u32	pdr_features;
+};
+
 #endif
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 3dda4ecb9a46..e3975e4d458c 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -67,12 +67,54 @@ static int pdiag_put_mclist(const struct packet_sock *po, struct sk_buff *nlskb)
 	return 0;
 }
 
+static int pdiag_put_ring(struct packet_ring_buffer *ring, int ver, int nl_type,
+		struct sk_buff *nlskb)
+{
+	struct packet_diag_ring pdr;
+
+	if (!ring->pg_vec || ((ver > TPACKET_V2) &&
+				(nl_type == PACKET_DIAG_TX_RING)))
+		return 0;
+
+	pdr.pdr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
+	pdr.pdr_block_nr = ring->pg_vec_len;
+	pdr.pdr_frame_size = ring->frame_size;
+	pdr.pdr_frame_nr = ring->frame_max + 1;
+
+	if (ver > TPACKET_V2) {
+		pdr.pdr_retire_tmo = ring->prb_bdqc.retire_blk_tov;
+		pdr.pdr_sizeof_priv = ring->prb_bdqc.blk_sizeof_priv;
+		pdr.pdr_features = ring->prb_bdqc.feature_req_word;
+	} else {
+		pdr.pdr_retire_tmo = 0;
+		pdr.pdr_sizeof_priv = 0;
+		pdr.pdr_features = 0;
+	}
+
+	return nla_put(nlskb, nl_type, sizeof(pdr), &pdr);
+}
+
+static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb)
+{
+	int ret;
+
+	mutex_lock(&po->pg_vec_lock);
+	ret = pdiag_put_ring(&po->rx_ring, po->tp_version,
+			PACKET_DIAG_RX_RING, skb);
+	if (!ret)
+		ret = pdiag_put_ring(&po->tx_ring, po->tp_version,
+				PACKET_DIAG_TX_RING, skb);
+	mutex_unlock(&po->pg_vec_lock);
+
+	return ret;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
 	struct nlmsghdr *nlh;
 	struct packet_diag_msg *rp;
-	const struct packet_sock *po = pkt_sk(sk);
+	struct packet_sock *po = pkt_sk(sk);
 
 	nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags);
 	if (!nlh)
@@ -93,6 +135,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
 			pdiag_put_mclist(po, skb))
 		goto out_nlmsg_trim;
 
+	if ((req->pdiag_show & PACKET_SHOW_RING_CFG) &&
+			pdiag_put_rings_cfg(po, skb))
+		goto out_nlmsg_trim;
+
 	return nlmsg_end(skb, nlh);
 
 out_nlmsg_trim:
-- 
cgit v1.2.3


From fff3321d75b1a18231876a1aceb36eacbbf6221e Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 16 Aug 2012 05:36:48 +0000
Subject: packet: Report fanout status via diag engine

Reported value is the same reported by the FANOUT getsockoption, but
unlike it, the absent fanout setup results in absent nlattr, rather
than in nlattr with zero value. This is done so, since zero fanout
report may mean both -- no fanout, and fanout with both id and type zero.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/packet_diag.h |  2 ++
 net/packet/af_packet.c      | 23 +++--------------------
 net/packet/diag.c           | 20 ++++++++++++++++++++
 net/packet/internal.h       | 20 +++++++++++++++++++-
 4 files changed, 44 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/packet_diag.h b/include/linux/packet_diag.h
index 34ade828979b..93f5fa94a431 100644
--- a/include/linux/packet_diag.h
+++ b/include/linux/packet_diag.h
@@ -15,6 +15,7 @@ struct packet_diag_req {
 #define PACKET_SHOW_INFO	0x00000001 /* Basic packet_sk information */
 #define PACKET_SHOW_MCLIST	0x00000002 /* A set of packet_diag_mclist-s */
 #define PACKET_SHOW_RING_CFG	0x00000004 /* Rings configuration parameters */
+#define PACKET_SHOW_FANOUT	0x00000008
 
 struct packet_diag_msg {
 	__u8	pdiag_family;
@@ -30,6 +31,7 @@ enum {
 	PACKET_DIAG_MCLIST,
 	PACKET_DIAG_RX_RING,
 	PACKET_DIAG_TX_RING,
+	PACKET_DIAG_FANOUT,
 
 	PACKET_DIAG_MAX,
 };
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8a1605ae4029..226b2cdfc339 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -207,24 +207,6 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
 		struct tpacket3_hdr *);
 static void packet_flush_mclist(struct sock *sk);
 
-#define PACKET_FANOUT_MAX	256
-
-struct packet_fanout {
-#ifdef CONFIG_NET_NS
-	struct net		*net;
-#endif
-	unsigned int		num_members;
-	u16			id;
-	u8			type;
-	u8			defrag;
-	atomic_t		rr_cur;
-	struct list_head	list;
-	struct sock		*arr[PACKET_FANOUT_MAX];
-	spinlock_t		lock;
-	atomic_t		sk_ref;
-	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
-};
-
 struct packet_skb_cb {
 	unsigned int origlen;
 	union {
@@ -1148,7 +1130,8 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
 }
 
-static DEFINE_MUTEX(fanout_mutex);
+DEFINE_MUTEX(fanout_mutex);
+EXPORT_SYMBOL_GPL(fanout_mutex);
 static LIST_HEAD(fanout_list);
 
 static void __fanout_link(struct sock *sk, struct packet_sock *po)
@@ -1260,9 +1243,9 @@ static void fanout_release(struct sock *sk)
 	if (!f)
 		return;
 
+	mutex_lock(&fanout_mutex);
 	po->fanout = NULL;
 
-	mutex_lock(&fanout_mutex);
 	if (atomic_dec_and_test(&f->sk_ref)) {
 		list_del(&f->list);
 		dev_remove_pack(&f->prot_hook);
diff --git a/net/packet/diag.c b/net/packet/diag.c
index e3975e4d458c..bc33fbe8a5ef 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -109,6 +109,22 @@ static int pdiag_put_rings_cfg(struct packet_sock *po, struct sk_buff *skb)
 	return ret;
 }
 
+static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
+{
+	int ret = 0;
+
+	mutex_lock(&fanout_mutex);
+	if (po->fanout) {
+		u32 val;
+
+		val = (u32)po->fanout->id | ((u32)po->fanout->type << 16);
+		ret = nla_put_u32(nlskb, PACKET_DIAG_FANOUT, val);
+	}
+	mutex_unlock(&fanout_mutex);
+
+	return ret;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -139,6 +155,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
 			pdiag_put_rings_cfg(po, skb))
 		goto out_nlmsg_trim;
 
+	if ((req->pdiag_show & PACKET_SHOW_FANOUT) &&
+			pdiag_put_fanout(po, skb))
+		goto out_nlmsg_trim;
+
 	return nlmsg_end(skb, nlh);
 
 out_nlmsg_trim:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 2c5fca28b242..44945f6b7252 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -67,7 +67,25 @@ struct packet_ring_buffer {
 	atomic_t		pending;
 };
 
-struct packet_fanout;
+extern struct mutex fanout_mutex;
+#define PACKET_FANOUT_MAX	256
+
+struct packet_fanout {
+#ifdef CONFIG_NET_NS
+	struct net		*net;
+#endif
+	unsigned int		num_members;
+	u16			id;
+	u8			type;
+	u8			defrag;
+	atomic_t		rr_cur;
+	struct list_head	list;
+	struct sock		*arr[PACKET_FANOUT_MAX];
+	spinlock_t		lock;
+	atomic_t		sk_ref;
+	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
+};
+
 struct packet_sock {
 	/* struct sock has to be the first member of packet_sock */
 	struct sock		sk;
-- 
cgit v1.2.3


From 34f256cc7962a44537a0d33877cd93c89873098e Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Thu, 16 Aug 2012 12:09:13 +0000
Subject: tipc: eliminate configuration for maximum number of name
 subscriptions

Gets rid of the need for users to specify the maximum number of
name subscriptions supported by TIPC. TIPC now automatically provides
support for the maximum number of name subscriptions to 65535.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |  4 ++--
 net/tipc/config.c           | 25 +++----------------------
 net/tipc/core.c             |  2 --
 net/tipc/core.h             |  4 ++--
 net/tipc/subscr.c           |  4 ++--
 5 files changed, 9 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index c98928420100..8dd8305c26c5 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -90,7 +90,7 @@
 #define  TIPC_CMD_GET_REMOTE_MNG    0x4003    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_PORTS     0x4004    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_PUBL      0x4005    /* tx none, rx unsigned */
-#define  TIPC_CMD_GET_MAX_SUBSCR    0x4006    /* tx none, rx unsigned */
+#define  TIPC_CMD_GET_MAX_SUBSCR    0x4006    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_NODES     0x4009    /* obsoleted */
@@ -116,7 +116,7 @@
 #define  TIPC_CMD_SET_REMOTE_MNG    0x8003    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_PORTS     0x8004    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_PUBL      0x8005    /* tx unsigned, rx none */
-#define  TIPC_CMD_SET_MAX_SUBSCR    0x8006    /* tx unsigned, rx none */
+#define  TIPC_CMD_SET_MAX_SUBSCR    0x8006    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_NODES     0x8009    /* obsoleted */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index a056a3852f71..7a1275863c8a 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -2,7 +2,7 @@
  * net/tipc/config.c: TIPC configuration management code
  *
  * Copyright (c) 2002-2006, Ericsson AB
- * Copyright (c) 2004-2007, 2010-2011, Wind River Systems
+ * Copyright (c) 2004-2007, 2010-2012, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -223,21 +223,6 @@ static struct sk_buff *cfg_set_max_publications(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_max_subscriptions(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value < 1 || value > 65535)
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (max subscriptions must be 1-65535");
-	tipc_max_subscriptions = value;
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_max_ports(void)
 {
 	u32 value;
@@ -360,9 +345,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_MAX_PUBL:
 		rep_tlv_buf = cfg_set_max_publications();
 		break;
-	case TIPC_CMD_SET_MAX_SUBSCR:
-		rep_tlv_buf = cfg_set_max_subscriptions();
-		break;
 	case TIPC_CMD_SET_NETID:
 		rep_tlv_buf = cfg_set_netid();
 		break;
@@ -375,9 +357,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_PUBL:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications);
 		break;
-	case TIPC_CMD_GET_MAX_SUBSCR:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_subscriptions);
-		break;
 	case TIPC_CMD_GET_NETID:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
 		break;
@@ -393,6 +372,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_CLUSTERS:
 	case TIPC_CMD_SET_MAX_NODES:
 	case TIPC_CMD_GET_MAX_NODES:
+	case TIPC_CMD_SET_MAX_SUBSCR:
+	case TIPC_CMD_GET_MAX_SUBSCR:
 	case TIPC_CMD_SET_LOG_SIZE:
 	case TIPC_CMD_DUMP_LOG:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
diff --git a/net/tipc/core.c b/net/tipc/core.c
index b858f2003523..73e5eac20735 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -53,7 +53,6 @@ int tipc_random __read_mostly;
 /* configurable TIPC parameters */
 u32 tipc_own_addr __read_mostly;
 int tipc_max_ports __read_mostly;
-int tipc_max_subscriptions;
 int tipc_max_publications;
 int tipc_net_id __read_mostly;
 int tipc_remote_management __read_mostly;
@@ -157,7 +156,6 @@ static int __init tipc_init(void)
 	tipc_own_addr = 0;
 	tipc_remote_management = 1;
 	tipc_max_publications = 10000;
-	tipc_max_subscriptions = 2000;
 	tipc_max_ports = CONFIG_TIPC_PORTS;
 	tipc_net_id = 4711;
 
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 4c5705ac8a5a..ef01412b0989 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,7 +60,8 @@
 
 #define TIPC_MOD_VER "2.0.0"
 
-#define ULTRA_STRING_MAX_LEN 32768
+#define ULTRA_STRING_MAX_LEN	32768
+#define TIPC_MAX_SUBSCRIPTIONS	65535
 
 struct tipc_msg;	/* msg.h */
 
@@ -76,7 +77,6 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...);
  */
 extern u32 tipc_own_addr __read_mostly;
 extern int tipc_max_ports __read_mostly;
-extern int tipc_max_subscriptions;
 extern int tipc_max_publications;
 extern int tipc_net_id __read_mostly;
 extern int tipc_remote_management __read_mostly;
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 5ed5965eb0be..0f7d0d007e22 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -304,9 +304,9 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s,
 	}
 
 	/* Refuse subscription if global limit exceeded */
-	if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) {
+	if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) {
 		pr_warn("Subscription rejected, limit reached (%u)\n",
-			tipc_max_subscriptions);
+			TIPC_MAX_SUBSCRIPTIONS);
 		subscr_terminate(subscriber);
 		return NULL;
 	}
-- 
cgit v1.2.3


From e6a04b1d3ff9d5af219b2fcaebe0ef04733d597c Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Thu, 16 Aug 2012 12:09:14 +0000
Subject: tipc: eliminate configuration for maximum number of name publications

Gets rid of the need for users to specify the maximum number of
name publications supported by TIPC. TIPC now automatically provides
support for the maximum number of name publications to 65535.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |  4 ++--
 net/tipc/config.c           | 23 ++---------------------
 net/tipc/core.c             |  2 --
 net/tipc/core.h             |  2 +-
 net/tipc/name_table.c       |  4 ++--
 5 files changed, 7 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 8dd8305c26c5..0b1e3f218a36 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -89,7 +89,7 @@
 
 #define  TIPC_CMD_GET_REMOTE_MNG    0x4003    /* tx none, rx unsigned */
 #define  TIPC_CMD_GET_MAX_PORTS     0x4004    /* tx none, rx unsigned */
-#define  TIPC_CMD_GET_MAX_PUBL      0x4005    /* tx none, rx unsigned */
+#define  TIPC_CMD_GET_MAX_PUBL      0x4005    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_SUBSCR    0x4006    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_ZONES     0x4007    /* obsoleted */
 #define  TIPC_CMD_GET_MAX_CLUSTERS  0x4008    /* obsoleted */
@@ -115,7 +115,7 @@
 #define  TIPC_CMD_SET_NODE_ADDR     0x8001    /* tx net_addr, rx none */
 #define  TIPC_CMD_SET_REMOTE_MNG    0x8003    /* tx unsigned, rx none */
 #define  TIPC_CMD_SET_MAX_PORTS     0x8004    /* tx unsigned, rx none */
-#define  TIPC_CMD_SET_MAX_PUBL      0x8005    /* tx unsigned, rx none */
+#define  TIPC_CMD_SET_MAX_PUBL      0x8005    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_SUBSCR    0x8006    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_ZONES     0x8007    /* obsoleted */
 #define  TIPC_CMD_SET_MAX_CLUSTERS  0x8008    /* obsoleted */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 7a1275863c8a..f67866c765dd 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -208,21 +208,6 @@ static struct sk_buff *cfg_set_remote_mng(void)
 	return tipc_cfg_reply_none();
 }
 
-static struct sk_buff *cfg_set_max_publications(void)
-{
-	u32 value;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	value = ntohl(*(__be32 *)TLV_DATA(req_tlv_area));
-	if (value < 1 || value > 65535)
-		return tipc_cfg_reply_error_string(TIPC_CFG_INVALID_VALUE
-						   " (max publications must be 1-65535)");
-	tipc_max_publications = value;
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_max_ports(void)
 {
 	u32 value;
@@ -342,9 +327,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_SET_MAX_PORTS:
 		rep_tlv_buf = cfg_set_max_ports();
 		break;
-	case TIPC_CMD_SET_MAX_PUBL:
-		rep_tlv_buf = cfg_set_max_publications();
-		break;
 	case TIPC_CMD_SET_NETID:
 		rep_tlv_buf = cfg_set_netid();
 		break;
@@ -354,9 +336,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_PORTS:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_ports);
 		break;
-	case TIPC_CMD_GET_MAX_PUBL:
-		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_max_publications);
-		break;
 	case TIPC_CMD_GET_NETID:
 		rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id);
 		break;
@@ -374,6 +353,8 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_GET_MAX_NODES:
 	case TIPC_CMD_SET_MAX_SUBSCR:
 	case TIPC_CMD_GET_MAX_SUBSCR:
+	case TIPC_CMD_SET_MAX_PUBL:
+	case TIPC_CMD_GET_MAX_PUBL:
 	case TIPC_CMD_SET_LOG_SIZE:
 	case TIPC_CMD_DUMP_LOG:
 		rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 73e5eac20735..bfe8af88469a 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -53,7 +53,6 @@ int tipc_random __read_mostly;
 /* configurable TIPC parameters */
 u32 tipc_own_addr __read_mostly;
 int tipc_max_ports __read_mostly;
-int tipc_max_publications;
 int tipc_net_id __read_mostly;
 int tipc_remote_management __read_mostly;
 
@@ -155,7 +154,6 @@ static int __init tipc_init(void)
 
 	tipc_own_addr = 0;
 	tipc_remote_management = 1;
-	tipc_max_publications = 10000;
 	tipc_max_ports = CONFIG_TIPC_PORTS;
 	tipc_net_id = 4711;
 
diff --git a/net/tipc/core.h b/net/tipc/core.h
index ef01412b0989..0207db04179a 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -62,6 +62,7 @@
 
 #define ULTRA_STRING_MAX_LEN	32768
 #define TIPC_MAX_SUBSCRIPTIONS	65535
+#define TIPC_MAX_PUBLICATIONS	65535
 
 struct tipc_msg;	/* msg.h */
 
@@ -77,7 +78,6 @@ int tipc_snprintf(char *buf, int len, const char *fmt, ...);
  */
 extern u32 tipc_own_addr __read_mostly;
 extern int tipc_max_ports __read_mostly;
-extern int tipc_max_publications;
 extern int tipc_net_id __read_mostly;
 extern int tipc_remote_management __read_mostly;
 
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 4ebdcc96cb04..98975e80bb51 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -667,9 +667,9 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
 {
 	struct publication *publ;
 
-	if (table.local_publ_count >= tipc_max_publications) {
+	if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) {
 		pr_warn("Publication failed, local publication limit reached (%u)\n",
-			tipc_max_publications);
+			TIPC_MAX_PUBLICATIONS);
 		return NULL;
 	}
 
-- 
cgit v1.2.3


From 1d76efe1577b4323609b1bcbfafa8b731eda071a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 17 Aug 2012 04:00:48 +0000
Subject: team: add support for non-ethernet devices

This is resolved by two things:
1) allow dev_addr of different length than ETH_ALEN
2) during port add, check for dev->type and change it if necessary

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/Kconfig                |  4 +-
 drivers/net/team/team.c                 | 89 +++++++++++++++++++++++++--------
 drivers/net/team/team_mode_broadcast.c  |  8 +--
 drivers/net/team/team_mode_roundrobin.c |  8 +--
 include/linux/if_team.h                 |  4 +-
 5 files changed, 79 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/net/team/Kconfig b/drivers/net/team/Kconfig
index 6a7260b03a1e..6b08bd419fba 100644
--- a/drivers/net/team/Kconfig
+++ b/drivers/net/team/Kconfig
@@ -21,7 +21,7 @@ config NET_TEAM_MODE_BROADCAST
 	---help---
 	  Basic mode where packets are transmitted always by all suitable ports.
 
-	  All added ports are setup to have team's mac address.
+	  All added ports are setup to have team's device address.
 
 	  To compile this team mode as a module, choose M here: the module
 	  will be called team_mode_broadcast.
@@ -33,7 +33,7 @@ config NET_TEAM_MODE_ROUNDROBIN
 	  Basic mode where port used for transmitting packets is selected in
 	  round-robin fashion using packet counter.
 
-	  All added ports are setup to have team's mac address.
+	  All added ports are setup to have team's device address.
 
 	  To compile this team mode as a module, choose M here: the module
 	  will be called team_mode_roundrobin.
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ba10c469b02b..bc36d13c2675 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -54,29 +54,29 @@ static struct team_port *team_port_get_rtnl(const struct net_device *dev)
 }
 
 /*
- * Since the ability to change mac address for open port device is tested in
+ * Since the ability to change device address for open port device is tested in
  * team_port_add, this function can be called without control of return value
  */
-static int __set_port_mac(struct net_device *port_dev,
-			  const unsigned char *dev_addr)
+static int __set_port_dev_addr(struct net_device *port_dev,
+			       const unsigned char *dev_addr)
 {
 	struct sockaddr addr;
 
-	memcpy(addr.sa_data, dev_addr, ETH_ALEN);
-	addr.sa_family = ARPHRD_ETHER;
+	memcpy(addr.sa_data, dev_addr, port_dev->addr_len);
+	addr.sa_family = port_dev->type;
 	return dev_set_mac_address(port_dev, &addr);
 }
 
-static int team_port_set_orig_mac(struct team_port *port)
+static int team_port_set_orig_dev_addr(struct team_port *port)
 {
-	return __set_port_mac(port->dev, port->orig.dev_addr);
+	return __set_port_dev_addr(port->dev, port->orig.dev_addr);
 }
 
-int team_port_set_team_mac(struct team_port *port)
+int team_port_set_team_dev_addr(struct team_port *port)
 {
-	return __set_port_mac(port->dev, port->team->dev->dev_addr);
+	return __set_port_dev_addr(port->dev, port->team->dev->dev_addr);
 }
-EXPORT_SYMBOL(team_port_set_team_mac);
+EXPORT_SYMBOL(team_port_set_team_dev_addr);
 
 static void team_refresh_port_linkup(struct team_port *port)
 {
@@ -965,6 +965,8 @@ static struct netpoll_info *team_netpoll_info(struct team *team)
 #endif
 
 static void __team_port_change_check(struct team_port *port, bool linkup);
+static int team_dev_type_check_change(struct net_device *dev,
+				      struct net_device *port_dev);
 
 static int team_port_add(struct team *team, struct net_device *port_dev)
 {
@@ -973,9 +975,8 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 	char *portname = port_dev->name;
 	int err;
 
-	if (port_dev->flags & IFF_LOOPBACK ||
-	    port_dev->type != ARPHRD_ETHER) {
-		netdev_err(dev, "Device %s is of an unsupported type\n",
+	if (port_dev->flags & IFF_LOOPBACK) {
+		netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n",
 			   portname);
 		return -EINVAL;
 	}
@@ -986,6 +987,10 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		return -EBUSY;
 	}
 
+	err = team_dev_type_check_change(dev, port_dev);
+	if (err)
+		return err;
+
 	if (port_dev->flags & IFF_UP) {
 		netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
 			   portname);
@@ -1008,7 +1013,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev)
 		goto err_set_mtu;
 	}
 
-	memcpy(port->orig.dev_addr, port_dev->dev_addr, ETH_ALEN);
+	memcpy(port->orig.dev_addr, port_dev->dev_addr, port_dev->addr_len);
 
 	err = team_port_enter(team, port);
 	if (err) {
@@ -1089,7 +1094,7 @@ err_vids_add:
 
 err_dev_open:
 	team_port_leave(team, port);
-	team_port_set_orig_mac(port);
+	team_port_set_orig_dev_addr(port);
 
 err_port_enter:
 	dev_set_mtu(port_dev, port->orig.mtu);
@@ -1126,7 +1131,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
 	vlan_vids_del_by_dev(port_dev, dev);
 	dev_close(port_dev);
 	team_port_leave(team, port);
-	team_port_set_orig_mac(port);
+	team_port_set_orig_dev_addr(port);
 	dev_set_mtu(port_dev, port->orig.mtu);
 	synchronize_rcu();
 	kfree(port);
@@ -1477,17 +1482,18 @@ static void team_set_rx_mode(struct net_device *dev)
 
 static int team_set_mac_address(struct net_device *dev, void *p)
 {
+	struct sockaddr *addr = p;
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
-	int err;
 
-	err = eth_mac_addr(dev, p);
-	if (err)
-		return err;
+	if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
 	rcu_read_lock();
 	list_for_each_entry_rcu(port, &team->port_list, list)
-		if (team->ops.port_change_mac)
-			team->ops.port_change_mac(team, port);
+		if (team->ops.port_change_dev_addr)
+			team->ops.port_change_dev_addr(team, port);
 	rcu_read_unlock();
 	return 0;
 }
@@ -1718,6 +1724,45 @@ static const struct net_device_ops team_netdev_ops = {
  * rt netlink interface
  ***********************/
 
+static void team_setup_by_port(struct net_device *dev,
+			       struct net_device *port_dev)
+{
+	dev->header_ops	= port_dev->header_ops;
+	dev->type = port_dev->type;
+	dev->hard_header_len = port_dev->hard_header_len;
+	dev->addr_len = port_dev->addr_len;
+	dev->mtu = port_dev->mtu;
+	memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len);
+	memcpy(dev->dev_addr, port_dev->dev_addr, port_dev->addr_len);
+	dev->addr_assign_type &= ~NET_ADDR_RANDOM;
+}
+
+static int team_dev_type_check_change(struct net_device *dev,
+				      struct net_device *port_dev)
+{
+	struct team *team = netdev_priv(dev);
+	char *portname = port_dev->name;
+	int err;
+
+	if (dev->type == port_dev->type)
+		return 0;
+	if (!list_empty(&team->port_list)) {
+		netdev_err(dev, "Device %s is of different type\n", portname);
+		return -EBUSY;
+	}
+	err = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, dev);
+	err = notifier_to_errno(err);
+	if (err) {
+		netdev_err(dev, "Refused to change device type\n");
+		return err;
+	}
+	dev_uc_flush(dev);
+	dev_mc_flush(dev);
+	team_setup_by_port(dev, port_dev);
+	call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev);
+	return 0;
+}
+
 static void team_setup(struct net_device *dev)
 {
 	ether_setup(dev);
diff --git a/drivers/net/team/team_mode_broadcast.c b/drivers/net/team/team_mode_broadcast.c
index c96e4d2967f0..9db0171e9366 100644
--- a/drivers/net/team/team_mode_broadcast.c
+++ b/drivers/net/team/team_mode_broadcast.c
@@ -48,18 +48,18 @@ static bool bc_transmit(struct team *team, struct sk_buff *skb)
 
 static int bc_port_enter(struct team *team, struct team_port *port)
 {
-	return team_port_set_team_mac(port);
+	return team_port_set_team_dev_addr(port);
 }
 
-static void bc_port_change_mac(struct team *team, struct team_port *port)
+static void bc_port_change_dev_addr(struct team *team, struct team_port *port)
 {
-	team_port_set_team_mac(port);
+	team_port_set_team_dev_addr(port);
 }
 
 static const struct team_mode_ops bc_mode_ops = {
 	.transmit		= bc_transmit,
 	.port_enter		= bc_port_enter,
-	.port_change_mac	= bc_port_change_mac,
+	.port_change_dev_addr	= bc_port_change_dev_addr,
 };
 
 static const struct team_mode bc_mode = {
diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c
index ad7ed0ec544c..105135aa8f05 100644
--- a/drivers/net/team/team_mode_roundrobin.c
+++ b/drivers/net/team/team_mode_roundrobin.c
@@ -66,18 +66,18 @@ drop:
 
 static int rr_port_enter(struct team *team, struct team_port *port)
 {
-	return team_port_set_team_mac(port);
+	return team_port_set_team_dev_addr(port);
 }
 
-static void rr_port_change_mac(struct team *team, struct team_port *port)
+static void rr_port_change_dev_addr(struct team *team, struct team_port *port)
 {
-	team_port_set_team_mac(port);
+	team_port_set_team_dev_addr(port);
 }
 
 static const struct team_mode_ops rr_mode_ops = {
 	.transmit		= rr_transmit,
 	.port_enter		= rr_port_enter,
-	.port_change_mac	= rr_port_change_mac,
+	.port_change_dev_addr	= rr_port_change_dev_addr,
 };
 
 static const struct team_mode rr_mode = {
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 33fcc20b5881..8b000b295730 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -123,7 +123,7 @@ struct team_mode_ops {
 	bool (*transmit)(struct team *team, struct sk_buff *skb);
 	int (*port_enter)(struct team *team, struct team_port *port);
 	void (*port_leave)(struct team *team, struct team_port *port);
-	void (*port_change_mac)(struct team *team, struct team_port *port);
+	void (*port_change_dev_addr)(struct team *team, struct team_port *port);
 	void (*port_enabled)(struct team *team, struct team_port *port);
 	void (*port_disabled)(struct team *team, struct team_port *port);
 };
@@ -238,7 +238,7 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
 	return NULL;
 }
 
-extern int team_port_set_team_mac(struct team_port *port);
+extern int team_port_set_team_dev_addr(struct team_port *port);
 extern int team_options_register(struct team *team,
 				 const struct team_option *option,
 				 size_t option_count);
-- 
cgit v1.2.3


From e687f61eedab8895e5669cb82cebe0253631cd8c Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <ordex@autistici.org>
Date: Sun, 12 Aug 2012 18:24:55 +0200
Subject: mac80211: add supported rates change notification in IBSS

In IBSS it is possible that the supported rates set for a station changes over
time (e.g. it gets first initialised as an empty set because of no available
information about rates and updated later). In this case the driver has to be
notified about the change in order to update its internal table accordingly (if
needed).

This behaviour is needed by all those drivers that handle rc internally but
leave stations management to mac80211

Reported-by: Gui Iribarren <gui@altermundi.net>
Signed-off-by: Antonio Quartulli <ordex@autistici.org>
[Johannes - add docs, validate IBSS mode only, fix compilation]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h    | 4 ++++
 net/mac80211/driver-ops.h | 3 +++
 net/mac80211/ibss.c       | 5 ++++-
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 8114f590f715..d9cef31f4cbf 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1897,10 +1897,14 @@ enum ieee80211_frame_release_type {
  * @IEEE80211_RC_BW_CHANGED: The bandwidth that can be used to transmit
  *	to this station changed.
  * @IEEE80211_RC_SMPS_CHANGED: The SMPS state of the station changed.
+ * @IEEE80211_RC_SUPP_RATES_CHANGED: The supported rate set of this peer
+ *	changed (in IBSS mode) due to discovering more information about
+ *	the peer.
  */
 enum ieee80211_rate_control_changed {
 	IEEE80211_RC_BW_CHANGED		= BIT(0),
 	IEEE80211_RC_SMPS_CHANGED	= BIT(1),
+	IEEE80211_RC_SUPP_RATES_CHANGED	= BIT(2),
 };
 
 /**
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index a81117a83996..a81154d27291 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -528,6 +528,9 @@ static inline void drv_sta_rc_update(struct ieee80211_local *local,
 	sdata = get_bss_sdata(sdata);
 	check_sdata_in_driver(sdata);
 
+	WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
+		sdata->vif.type != NL80211_IFTYPE_ADHOC);
+
 	trace_drv_sta_rc_update(local, sdata, sta, changed);
 	if (local->ops->sta_rc_update)
 		local->ops->sta_rc_update(&local->hw, &sdata->vif,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 1ebda2f9e57b..a9d93285dba7 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -459,8 +459,11 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 			}
 		}
 
-		if (sta && rates_updated)
+		if (sta && rates_updated) {
+			drv_sta_rc_update(local, sdata, &sta->sta,
+					  IEEE80211_RC_SUPP_RATES_CHANGED);
 			rate_control_rate_init(sta);
+		}
 
 		rcu_read_unlock();
 	}
-- 
cgit v1.2.3


From 48613ece3d6a2613caa376f51477435cc080f3cd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 5 Jul 2012 11:32:16 +0200
Subject: wireless: add radiotap A-MPDU status field

Define the A-MPDU status field in radiotap, also
update the radiotap parser for it and the MCS field
that was apparently missed last time.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/ieee80211_radiotap.h | 11 +++++++++++
 net/wireless/radiotap.c          |  2 ++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 71392545d0a1..7f0df133d119 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -183,6 +183,9 @@ struct ieee80211_radiotap_header {
  *     Contains a bitmap of known fields/flags, the flags, and
  *     the MCS index.
  *
+ * IEEE80211_RADIOTAP_AMPDU_STATUS	u32, u16, u8, u8	unitless
+ *
+ *	Contains the AMPDU information for the subframe.
  */
 enum ieee80211_radiotap_type {
 	IEEE80211_RADIOTAP_TSFT = 0,
@@ -205,6 +208,7 @@ enum ieee80211_radiotap_type {
 	IEEE80211_RADIOTAP_DATA_RETRIES = 17,
 
 	IEEE80211_RADIOTAP_MCS = 19,
+	IEEE80211_RADIOTAP_AMPDU_STATUS = 20,
 
 	/* valid in every it_present bitmap, even vendor namespaces */
 	IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29,
@@ -270,6 +274,13 @@ enum ieee80211_radiotap_type {
 #define IEEE80211_RADIOTAP_MCS_FMT_GF		0x08
 #define IEEE80211_RADIOTAP_MCS_FEC_LDPC		0x10
 
+/* For IEEE80211_RADIOTAP_AMPDU_STATUS */
+#define IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN		0x0001
+#define IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN		0x0002
+#define IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN		0x0004
+#define IEEE80211_RADIOTAP_AMPDU_IS_LAST		0x0008
+#define IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR		0x0010
+#define IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN	0x0020
 
 /* helpers */
 static inline int ieee80211_get_radiotap_len(unsigned char *data)
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index c4ad7958af52..7d604c06c3dc 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -41,6 +41,8 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = {
 	[IEEE80211_RADIOTAP_TX_FLAGS] = { .align = 2, .size = 2, },
 	[IEEE80211_RADIOTAP_RTS_RETRIES] = { .align = 1, .size = 1, },
 	[IEEE80211_RADIOTAP_DATA_RETRIES] = { .align = 1, .size = 1, },
+	[IEEE80211_RADIOTAP_MCS] = { .align = 1, .size = 3, },
+	[IEEE80211_RADIOTAP_AMPDU_STATUS] = { .align = 4, .size = 8, },
 	/*
 	 * add more here as they are defined in radiotap.h
 	 */
-- 
cgit v1.2.3


From 4c29867790131c281ef96af507d85e3e5f829408 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 5 Jul 2012 11:34:31 +0200
Subject: mac80211: support A-MPDU status reporting

Support getting A-MPDU status information from the
drivers and reporting it to userspace via radiotap
in the standard fields.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 53 +++++++++++++++++++++++++++++++++++++-------------
 net/mac80211/rx.c      | 42 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 80 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d9cef31f4cbf..edc235c51a68 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -676,21 +676,41 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_HT_GF: This frame was received in a HT-greenfield transmission, if
  *	the driver fills this value it should add %IEEE80211_RADIOTAP_MCS_HAVE_FMT
  *	to hw.radiotap_mcs_details to advertise that fact
+ * @RX_FLAG_AMPDU_DETAILS: A-MPDU details are known, in particular the reference
+ *	number (@ampdu_reference) must be populated and be a distinct number for
+ *	each A-MPDU
+ * @RX_FLAG_AMPDU_REPORT_ZEROLEN: driver reports 0-length subframes
+ * @RX_FLAG_AMPDU_IS_ZEROLEN: This is a zero-length subframe, for
+ *	monitoring purposes only
+ * @RX_FLAG_AMPDU_LAST_KNOWN: last subframe is known, should be set on all
+ *	subframes of a single A-MPDU
+ * @RX_FLAG_AMPDU_IS_LAST: this subframe is the last subframe of the A-MPDU
+ * @RX_FLAG_AMPDU_DELIM_CRC_ERROR: A delimiter CRC error has been detected
+ *	on this subframe
+ * @RX_FLAG_AMPDU_DELIM_CRC_KNOWN: The delimiter CRC field is known (the CRC
+ *	is stored in the @ampdu_delimiter_crc field)
  */
 enum mac80211_rx_flags {
-	RX_FLAG_MMIC_ERROR	= 1<<0,
-	RX_FLAG_DECRYPTED	= 1<<1,
-	RX_FLAG_MMIC_STRIPPED	= 1<<3,
-	RX_FLAG_IV_STRIPPED	= 1<<4,
-	RX_FLAG_FAILED_FCS_CRC	= 1<<5,
-	RX_FLAG_FAILED_PLCP_CRC = 1<<6,
-	RX_FLAG_MACTIME_MPDU	= 1<<7,
-	RX_FLAG_SHORTPRE	= 1<<8,
-	RX_FLAG_HT		= 1<<9,
-	RX_FLAG_40MHZ		= 1<<10,
-	RX_FLAG_SHORT_GI	= 1<<11,
-	RX_FLAG_NO_SIGNAL_VAL	= 1<<12,
-	RX_FLAG_HT_GF		= 1<<13,
+	RX_FLAG_MMIC_ERROR		= BIT(0),
+	RX_FLAG_DECRYPTED		= BIT(1),
+	RX_FLAG_MMIC_STRIPPED		= BIT(3),
+	RX_FLAG_IV_STRIPPED		= BIT(4),
+	RX_FLAG_FAILED_FCS_CRC		= BIT(5),
+	RX_FLAG_FAILED_PLCP_CRC 	= BIT(6),
+	RX_FLAG_MACTIME_MPDU		= BIT(7),
+	RX_FLAG_SHORTPRE		= BIT(8),
+	RX_FLAG_HT			= BIT(9),
+	RX_FLAG_40MHZ			= BIT(10),
+	RX_FLAG_SHORT_GI		= BIT(11),
+	RX_FLAG_NO_SIGNAL_VAL		= BIT(12),
+	RX_FLAG_HT_GF			= BIT(13),
+	RX_FLAG_AMPDU_DETAILS		= BIT(14),
+	RX_FLAG_AMPDU_REPORT_ZEROLEN	= BIT(15),
+	RX_FLAG_AMPDU_IS_ZEROLEN	= BIT(16),
+	RX_FLAG_AMPDU_LAST_KNOWN	= BIT(17),
+	RX_FLAG_AMPDU_IS_LAST		= BIT(18),
+	RX_FLAG_AMPDU_DELIM_CRC_ERROR	= BIT(19),
+	RX_FLAG_AMPDU_DELIM_CRC_KNOWN	= BIT(20),
 };
 
 /**
@@ -714,17 +734,22 @@ enum mac80211_rx_flags {
  *	HT rates are use (RX_FLAG_HT)
  * @flag: %RX_FLAG_*
  * @rx_flags: internal RX flags for mac80211
+ * @ampdu_reference: A-MPDU reference number, must be a different value for
+ *	each A-MPDU but the same for each subframe within one A-MPDU
+ * @ampdu_delimiter_crc: A-MPDU delimiter CRC
  */
 struct ieee80211_rx_status {
 	u64 mactime;
 	u32 device_timestamp;
-	u16 flag;
+	u32 ampdu_reference;
+	u32 flag;
 	u16 freq;
 	u8 rate_idx;
 	u8 rx_flags;
 	u8 band;
 	u8 antenna;
 	s8 signal;
+	u8 ampdu_delimiter_crc;
 };
 
 /**
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0cb4edee6af5..78bf6c7e80c8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -60,7 +60,9 @@ static inline int should_drop_frame(struct sk_buff *skb,
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
-	if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
+	if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
+			    RX_FLAG_FAILED_PLCP_CRC |
+			    RX_FLAG_AMPDU_IS_ZEROLEN))
 		return 1;
 	if (unlikely(skb->len < 16 + present_fcs_len))
 		return 1;
@@ -91,6 +93,13 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
 	if (status->flag & RX_FLAG_HT) /* HT info */
 		len += 3;
 
+	if (status->flag & RX_FLAG_AMPDU_DETAILS) {
+		/* padding */
+		while (len & 3)
+			len++;
+		len += 8;
+	}
+
 	return len;
 }
 
@@ -215,6 +224,37 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		pos++;
 		*pos++ = status->rate_idx;
 	}
+
+	if (status->flag & RX_FLAG_AMPDU_DETAILS) {
+		u16 flags = 0;
+
+		/* ensure 4 byte alignment */
+		while ((pos - (u8 *)rthdr) & 3)
+			pos++;
+		rthdr->it_present |=
+			cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS);
+		put_unaligned_le32(status->ampdu_reference, pos);
+		pos += 4;
+		if (status->flag & RX_FLAG_AMPDU_REPORT_ZEROLEN)
+			flags |= IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN;
+		if (status->flag & RX_FLAG_AMPDU_IS_ZEROLEN)
+			flags |= IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN;
+		if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN)
+			flags |= IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN;
+		if (status->flag & RX_FLAG_AMPDU_IS_LAST)
+			flags |= IEEE80211_RADIOTAP_AMPDU_IS_LAST;
+		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_ERROR)
+			flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_ERR;
+		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
+			flags |= IEEE80211_RADIOTAP_AMPDU_DELIM_CRC_KNOWN;
+		put_unaligned_le16(flags, pos);
+		pos += 2;
+		if (status->flag & RX_FLAG_AMPDU_DELIM_CRC_KNOWN)
+			*pos++ = status->ampdu_delimiter_crc;
+		else
+			*pos++ = 0;
+		*pos++ = 0;
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 98104fdeda63d57631c9f89e90a7b83b58fcee40 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 16 Jun 2012 00:19:54 +0200
Subject: cfg80211: add P2P Device abstraction

In order to support using a different MAC address
for the P2P Device address we must first have a
P2P Device abstraction that can be assigned a MAC
address.

This abstraction will also be useful to support
offloading P2P operations to the device, e.g.
periodic listen for discoverability.

Currently, the driver is responsible for assigning
a MAC address to the P2P Device, but this could be
changed by allowing a MAC address to be given to
the NEW_INTERFACE command.

As it has no associated netdev, a P2P Device can
only be identified by its wdev identifier but the
previous patches allowed using the wdev identifier
in various APIs, e.g. remain-on-channel.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/nl80211.h |  30 ++++++++++--
 include/net/cfg80211.h  |  40 +++++++++++++++-
 net/mac80211/iface.c    |   3 ++
 net/mac80211/util.c     |   2 +
 net/wireless/chan.c     |   7 ++-
 net/wireless/core.c     |  53 ++++++++++++++++++++-
 net/wireless/mlme.c     |  10 ++--
 net/wireless/nl80211.c  | 122 +++++++++++++++++++++++++++++++++++++++++++++---
 net/wireless/util.c     |  18 ++++++-
 9 files changed, 265 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2f3878806403..458416279347 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -565,6 +565,14 @@
  *	%NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ with
  *	%NL80211_ATTR_WIPHY_CHANNEL_TYPE.
  *
+ * @NL80211_CMD_START_P2P_DEVICE: Start the given P2P Device, identified by
+ *	its %NL80211_ATTR_WDEV identifier. It must have been created with
+ *	%NL80211_CMD_NEW_INTERFACE previously. After it has been started, the
+ *	P2P Device can be used for P2P operations, e.g. remain-on-channel and
+ *	public action frame TX.
+ * @NL80211_CMD_STOP_P2P_DEVICE: Stop the given P2P Device, identified by
+ *	its %NL80211_ATTR_WDEV identifier.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -708,6 +716,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_CH_SWITCH_NOTIFY,
 
+	NL80211_CMD_START_P2P_DEVICE,
+	NL80211_CMD_STOP_P2P_DEVICE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1575,6 +1586,10 @@ enum nl80211_attrs {
  * @NL80211_IFTYPE_MESH_POINT: mesh point
  * @NL80211_IFTYPE_P2P_CLIENT: P2P client
  * @NL80211_IFTYPE_P2P_GO: P2P group owner
+ * @NL80211_IFTYPE_P2P_DEVICE: P2P device interface type, this is not a netdev
+ *	and therefore can't be created in the normal ways, use the
+ *	%NL80211_CMD_START_P2P_DEVICE and %NL80211_CMD_STOP_P2P_DEVICE
+ *	commands to create and destroy one
  * @NL80211_IFTYPE_MAX: highest interface type number currently defined
  * @NUM_NL80211_IFTYPES: number of defined interface types
  *
@@ -1593,6 +1608,7 @@ enum nl80211_iftype {
 	NL80211_IFTYPE_MESH_POINT,
 	NL80211_IFTYPE_P2P_CLIENT,
 	NL80211_IFTYPE_P2P_GO,
+	NL80211_IFTYPE_P2P_DEVICE,
 
 	/* keep last */
 	NUM_NL80211_IFTYPES,
@@ -2994,12 +3010,18 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested
  *	to work properly to suppport receiving regulatory hints from
  *	cellular base stations.
+ * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: If this is set, an active
+ *	P2P Device (%NL80211_IFTYPE_P2P_DEVICE) requires its own channel
+ *	in the interface combinations, even when it's only used for scan
+ *	and remain-on-channel. This could be due to, for example, the
+ *	remain-on-channel implementation requiring a channel context.
  */
 enum nl80211_feature_flags {
-	NL80211_FEATURE_SK_TX_STATUS	= 1 << 0,
-	NL80211_FEATURE_HT_IBSS		= 1 << 1,
-	NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2,
-	NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3,
+	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
+	NL80211_FEATURE_HT_IBSS				= 1 << 1,
+	NL80211_FEATURE_INACTIVITY_TIMER		= 1 << 2,
+	NL80211_FEATURE_CELL_BASE_REG_HINTS		= 1 << 3,
+	NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL	= 1 << 4,
 };
 
 /**
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 493fa0c79005..4c518f1f1aca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1437,7 +1437,8 @@ struct cfg80211_gtk_rekey_data {
  * @add_virtual_intf: create a new virtual interface with the given name,
  *	must set the struct wireless_dev's iftype. Beware: You must create
  *	the new netdev in the wiphy's network namespace! Returns the struct
- *	wireless_dev, or an ERR_PTR.
+ *	wireless_dev, or an ERR_PTR. For P2P device wdevs, the driver must
+ *	also set the address member in the wdev.
  *
  * @del_virtual_intf: remove the virtual interface
  *
@@ -1616,6 +1617,9 @@ struct cfg80211_gtk_rekey_data {
  * @get_channel: Get the current operating channel for the virtual interface.
  *	For monitor interfaces, it should return %NULL unless there's a single
  *	current monitoring channel.
+ *
+ * @start_p2p_device: Start the given P2P device.
+ * @stop_p2p_device: Stop the given P2P device.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -1832,6 +1836,11 @@ struct cfg80211_ops {
 		(*get_channel)(struct wiphy *wiphy,
 			       struct wireless_dev *wdev,
 			       enum nl80211_channel_type *type);
+
+	int	(*start_p2p_device)(struct wiphy *wiphy,
+				    struct wireless_dev *wdev);
+	void	(*stop_p2p_device)(struct wiphy *wiphy,
+				   struct wireless_dev *wdev);
 };
 
 /*
@@ -2395,6 +2404,8 @@ struct cfg80211_cached_keys;
  * @cleanup_work: work struct used for cleanup that can't be done directly
  * @beacon_interval: beacon interval used on this device for transmitting
  *	beacons, 0 when not valid
+ * @address: The address for this device, valid only if @netdev is %NULL
+ * @p2p_started: true if this is a P2P Device that has been started
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
@@ -2413,7 +2424,9 @@ struct wireless_dev {
 
 	struct work_struct cleanup_work;
 
-	bool use_4addr;
+	bool use_4addr, p2p_started;
+
+	u8 address[ETH_ALEN] __aligned(sizeof(u16));
 
 	/* currently used for IBSS and SME - might be rearranged later */
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
@@ -2461,6 +2474,13 @@ struct wireless_dev {
 #endif
 };
 
+static inline u8 *wdev_address(struct wireless_dev *wdev)
+{
+	if (wdev->netdev)
+		return wdev->netdev->dev_addr;
+	return wdev->address;
+}
+
 /**
  * wdev_priv - return wiphy priv from wireless_dev
  *
@@ -3528,6 +3548,22 @@ void cfg80211_ch_switch_notify(struct net_device *dev, int freq,
  */
 u32 cfg80211_calculate_bitrate(struct rate_info *rate);
 
+/**
+ * cfg80211_unregister_wdev - remove the given wdev
+ * @wdev: struct wireless_dev to remove
+ *
+ * Call this function only for wdevs that have no netdev assigned,
+ * e.g. P2P Devices. It removes the device from the list so that
+ * it can no longer be used. It is necessary to call this function
+ * even when cfg80211 requests the removal of the interface by
+ * calling the del_virtual_intf() callback. The function must also
+ * be called when the driver wishes to unregister the wdev, e.g.
+ * when the device is unbound from the driver.
+ *
+ * Requires the RTNL to be held.
+ */
+void cfg80211_unregister_wdev(struct wireless_dev *wdev);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index fbab7a84ca21..366d9d3e84c4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -449,6 +449,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up)
 	case NUM_NL80211_IFTYPES:
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_P2P_DEVICE:
 		/* cannot happen */
 		WARN_ON(1);
 		break;
@@ -1146,6 +1147,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_AP_VLAN:
 		break;
+	case NL80211_IFTYPE_P2P_DEVICE:
+		/* not yet supported */
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
 		BUG();
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 7dff94e43a0c..9a4e4e30ea6c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1390,6 +1390,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		case NL80211_IFTYPE_MONITOR:
 			/* ignore virtual */
 			break;
+		case NL80211_IFTYPE_P2P_DEVICE:
+			/* not yet supported */
 		case NL80211_IFTYPE_UNSPECIFIED:
 		case NUM_NL80211_IFTYPES:
 		case NL80211_IFTYPE_P2P_CLIENT:
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index d355f67d0cdd..2f876b9ee344 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -105,7 +105,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	if (!netif_running(wdev->netdev))
+	if (wdev->netdev && !netif_running(wdev->netdev))
 		return;
 
 	switch (wdev->iftype) {
@@ -143,6 +143,11 @@ cfg80211_get_chan_state(struct wireless_dev *wdev,
 	case NL80211_IFTYPE_WDS:
 		/* these interface types don't really have a channel */
 		return;
+	case NL80211_IFTYPE_P2P_DEVICE:
+		if (wdev->wiphy->features &
+				NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL)
+			*chanmode = CHAN_MODE_EXCLUSIVE;
+		return;
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
 		WARN_ON(1);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 31b40cc4a9c3..91b300443f4b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -230,9 +230,24 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked)
 	rtnl_lock();
 	mutex_lock(&rdev->devlist_mtx);
 
-	list_for_each_entry(wdev, &rdev->wdev_list, list)
-		if (wdev->netdev)
+	list_for_each_entry(wdev, &rdev->wdev_list, list) {
+		if (wdev->netdev) {
 			dev_close(wdev->netdev);
+			continue;
+		}
+		/* otherwise, check iftype */
+		switch (wdev->iftype) {
+		case NL80211_IFTYPE_P2P_DEVICE:
+			if (!wdev->p2p_started)
+				break;
+			rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
+			wdev->p2p_started = false;
+			rdev->opencount--;
+			break;
+		default:
+			break;
+		}
+	}
 
 	mutex_unlock(&rdev->devlist_mtx);
 	rtnl_unlock();
@@ -407,6 +422,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
 			if (WARN_ON(wiphy->software_iftypes & types))
 				return -EINVAL;
 
+			/* Only a single P2P_DEVICE can be allowed */
+			if (WARN_ON(types & BIT(NL80211_IFTYPE_P2P_DEVICE) &&
+				    c->limits[j].max > 1))
+				return -EINVAL;
+
 			cnt += c->limits[j].max;
 			/*
 			 * Don't advertise an unsupported type
@@ -734,6 +754,35 @@ static void wdev_cleanup_work(struct work_struct *work)
 	dev_put(wdev->netdev);
 }
 
+void cfg80211_unregister_wdev(struct wireless_dev *wdev)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+
+	ASSERT_RTNL();
+
+	if (WARN_ON(wdev->netdev))
+		return;
+
+	mutex_lock(&rdev->devlist_mtx);
+	list_del_rcu(&wdev->list);
+	rdev->devlist_generation++;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_P2P_DEVICE:
+		if (!wdev->p2p_started)
+			break;
+		rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
+		wdev->p2p_started = false;
+		rdev->opencount--;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+	mutex_unlock(&rdev->devlist_mtx);
+}
+EXPORT_SYMBOL(cfg80211_unregister_wdev);
+
 static struct device_type wiphy_type = {
 	.name	= "wlan",
 };
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 1cdb1d5e6b0f..8fd0242ee169 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -736,7 +736,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  const u8 *buf, size_t len, bool no_cck,
 			  bool dont_wait_for_ack, u64 *cookie)
 {
-	struct net_device *dev = wdev->netdev;
 	const struct ieee80211_mgmt *mgmt;
 	u16 stype;
 
@@ -796,7 +795,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 		case NL80211_IFTYPE_AP:
 		case NL80211_IFTYPE_P2P_GO:
 		case NL80211_IFTYPE_AP_VLAN:
-			if (!ether_addr_equal(mgmt->bssid, dev->dev_addr))
+			if (!ether_addr_equal(mgmt->bssid, wdev_address(wdev)))
 				err = -EINVAL;
 			break;
 		case NL80211_IFTYPE_MESH_POINT:
@@ -809,6 +808,11 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			 * cfg80211 doesn't track the stations
 			 */
 			break;
+		case NL80211_IFTYPE_P2P_DEVICE:
+			/*
+			 * fall through, P2P device only supports
+			 * public action frames
+			 */
 		default:
 			err = -EOPNOTSUPP;
 			break;
@@ -819,7 +823,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			return err;
 	}
 
-	if (!ether_addr_equal(mgmt->sa, dev->dev_addr))
+	if (!ether_addr_equal(mgmt->sa, wdev_address(wdev)))
 		return -EINVAL;
 
 	/* Transmit the Action frame as requested by user space */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 97026f3b215a..787aeaa902fe 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1100,6 +1100,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
 			goto nla_put_failure;
 	}
+	CMD(start_p2p_device, START_P2P_DEVICE);
 
 #ifdef CONFIG_NL80211_TESTMODE
 	CMD(testmode_cmd, TESTMODE);
@@ -1748,13 +1749,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	if (dev &&
 	    (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
-	     nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name) ||
-	     nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, dev->dev_addr)))
+	     nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name)))
 		goto nla_put_failure;
 
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
 	    nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) ||
 	    nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) ||
+	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) ||
 	    nla_put_u32(msg, NL80211_ATTR_GENERATION,
 			rdev->devlist_generation ^
 			(cfg80211_rdev_list_generation << 2)))
@@ -2021,8 +2022,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 		return PTR_ERR(wdev);
 	}
 
-	if (type == NL80211_IFTYPE_MESH_POINT &&
-	    info->attrs[NL80211_ATTR_MESH_ID]) {
+	switch (type) {
+	case NL80211_IFTYPE_MESH_POINT:
+		if (!info->attrs[NL80211_ATTR_MESH_ID])
+			break;
 		wdev_lock(wdev);
 		BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN !=
 			     IEEE80211_MAX_MESH_ID_LEN);
@@ -2031,6 +2034,26 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 		memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]),
 		       wdev->mesh_id_up_len);
 		wdev_unlock(wdev);
+		break;
+	case NL80211_IFTYPE_P2P_DEVICE:
+		/*
+		 * P2P Device doesn't have a netdev, so doesn't go
+		 * through the netdev notifier and must be added here
+		 */
+		mutex_init(&wdev->mtx);
+		INIT_LIST_HEAD(&wdev->event_list);
+		spin_lock_init(&wdev->event_lock);
+		INIT_LIST_HEAD(&wdev->mgmt_registrations);
+		spin_lock_init(&wdev->mgmt_registrations_lock);
+
+		mutex_lock(&rdev->devlist_mtx);
+		wdev->identifier = ++rdev->wdev_id;
+		list_add_rcu(&wdev->list, &rdev->wdev_list);
+		rdev->devlist_generation++;
+		mutex_unlock(&rdev->devlist_mtx);
+		break;
+	default:
+		break;
 	}
 
 	if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0,
@@ -6053,6 +6076,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_P2P_DEVICE:
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -6099,6 +6123,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_P2P_DEVICE:
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -6195,6 +6220,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_P2P_GO:
+	case NL80211_IFTYPE_P2P_DEVICE:
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -6810,6 +6836,68 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	int err;
+
+	if (!rdev->ops->start_p2p_device)
+		return -EOPNOTSUPP;
+
+	if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)
+		return -EOPNOTSUPP;
+
+	if (wdev->p2p_started)
+		return 0;
+
+	mutex_lock(&rdev->devlist_mtx);
+	err = cfg80211_can_add_interface(rdev, wdev->iftype);
+	mutex_unlock(&rdev->devlist_mtx);
+	if (err)
+		return err;
+
+	err = rdev->ops->start_p2p_device(&rdev->wiphy, wdev);
+	if (err)
+		return err;
+
+	wdev->p2p_started = true;
+	mutex_lock(&rdev->devlist_mtx);
+	rdev->opencount++;
+	mutex_unlock(&rdev->devlist_mtx);
+
+	return 0;
+}
+
+static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+
+	if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)
+		return -EOPNOTSUPP;
+
+	if (!rdev->ops->stop_p2p_device)
+		return -EOPNOTSUPP;
+
+	if (!wdev->p2p_started)
+		return 0;
+
+	rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
+	wdev->p2p_started = false;
+
+	mutex_lock(&rdev->devlist_mtx);
+	rdev->opencount--;
+	mutex_unlock(&rdev->devlist_mtx);
+
+	if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) {
+		rdev->scan_req->aborted = true;
+		___cfg80211_scan_done(rdev, true);
+	}
+
+	return 0;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -6817,7 +6905,7 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info)
 #define NL80211_FLAG_NEED_NETDEV_UP	(NL80211_FLAG_NEED_NETDEV |\
 					 NL80211_FLAG_CHECK_NETDEV_UP)
 #define NL80211_FLAG_NEED_WDEV		0x10
-/* If a netdev is associated, it must be UP */
+/* If a netdev is associated, it must be UP, P2P must be started */
 #define NL80211_FLAG_NEED_WDEV_UP	(NL80211_FLAG_NEED_WDEV |\
 					 NL80211_FLAG_CHECK_NETDEV_UP)
 
@@ -6878,6 +6966,13 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
 			}
 
 			dev_hold(dev);
+		} else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
+			if (!wdev->p2p_started) {
+				mutex_unlock(&cfg80211_mutex);
+				if (rtnl)
+					rtnl_unlock();
+				return -ENETDOWN;
+			}
 		}
 
 		cfg80211_lock_rdev(rdev);
@@ -7439,7 +7534,22 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
-
+	{
+		.cmd = NL80211_CMD_START_P2P_DEVICE,
+		.doit = nl80211_start_p2p_device,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_STOP_P2P_DEVICE,
+		.doit = nl80211_stop_p2p_device,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/util.c b/net/wireless/util.c
index ce393dd8c928..d7b672262b5f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -800,6 +800,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 	if (otype == NL80211_IFTYPE_AP_VLAN)
 		return -EOPNOTSUPP;
 
+	/* cannot change into P2P device type */
+	if (ntype == NL80211_IFTYPE_P2P_DEVICE)
+		return -EOPNOTSUPP;
+
 	if (!rdev->ops->change_virtual_intf ||
 	    !(rdev->wiphy.interface_modes & (1 << ntype)))
 		return -EOPNOTSUPP;
@@ -877,6 +881,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 		case NUM_NL80211_IFTYPES:
 			/* not happening */
 			break;
+		case NL80211_IFTYPE_P2P_DEVICE:
+			WARN_ON(1);
+			break;
 		}
 	}
 
@@ -1041,8 +1048,15 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
 	list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
 		if (wdev_iter == wdev)
 			continue;
-		if (!netif_running(wdev_iter->netdev))
-			continue;
+		if (wdev_iter->netdev) {
+			if (!netif_running(wdev_iter->netdev))
+				continue;
+		} else if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
+			if (!wdev_iter->p2p_started)
+				continue;
+		} else {
+			WARN_ON(1);
+		}
 
 		if (rdev->wiphy.software_iftypes & BIT(wdev_iter->iftype))
 			continue;
-- 
cgit v1.2.3


From 6d71117a279aa30574a8af6c7348570c292285c2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 19 Jun 2012 17:19:44 +0200
Subject: mac80211: add IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF

Some devices like the current iwlwifi implementation
require that the P2P interface address match the P2P
Device address (only one P2P interface is supported.)
Add the HW flag IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF
that allows drivers to request that P2P Interfaces
added while a P2P Device is active get the same MAC
address by default.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  5 +++++
 net/mac80211/iface.c   | 15 ++++++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index edc235c51a68..71f8262fc1df 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1241,6 +1241,10 @@ struct ieee80211_tx_control {
  *	queue mapping in order to use different queues (not just one per AC)
  *	for different virtual interfaces. See the doc section on HW queue
  *	control for more details.
+ *
+ * @IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF: Use the P2P Device address for any
+ *	P2P Interface. This will be honoured even if more than one interface
+ *	is supported.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -1268,6 +1272,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_AP_LINK_PS				= 1<<22,
 	IEEE80211_HW_TX_AMPDU_SETUP_IN_HW		= 1<<23,
 	IEEE80211_HW_SCAN_WHILE_IDLE			= 1<<24,
+	IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF		= 1<<25,
 };
 
 /**
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 152aeea14c85..59f8adc2aa5f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1313,7 +1313,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
 	    local->hw.wiphy->n_addresses <= 1)
 		return;
 
-
 	mutex_lock(&local->iflist_mtx);
 
 	switch (type) {
@@ -1331,6 +1330,19 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
 		}
 		/* keep default if no AP interface present */
 		break;
+	case NL80211_IFTYPE_P2P_CLIENT:
+	case NL80211_IFTYPE_P2P_GO:
+		if (local->hw.flags & IEEE80211_HW_P2P_DEV_ADDR_FOR_INTF) {
+			list_for_each_entry(sdata, &local->interfaces, list) {
+				if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE)
+					continue;
+				if (!ieee80211_sdata_running(sdata))
+					continue;
+				memcpy(perm_addr, sdata->vif.addr, ETH_ALEN);
+				goto out_unlock;
+			}
+		}
+		/* otherwise fall through */
 	default:
 		/* assign a new address if possible -- try n_addresses first */
 		for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
@@ -1405,6 +1417,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
 		break;
 	}
 
+ out_unlock:
 	mutex_unlock(&local->iflist_mtx);
 }
 
-- 
cgit v1.2.3


From 6f6bbc186dc8e4e0c628db7decbd1a5e02cb5fd8 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 26 Jul 2012 02:30:53 +0000
Subject: ethtool.h: MDI setting support

This change modifies the core ethtool struct to allow a driver to
support setting of MDI/MDI-X state for twisted pair wiring.  This
change uses a previously reserved u8 and should not change any
binary compatibility of ethtool.

Also as per Ben Hutchings' suggestion, the capabilities are
stored in a separate byte so the driver can report if it supports
changing settings.

see thread: http://kerneltrap.org/mailarchive/linux-netdev/2010/11/17/6289820/thread

see ethtool patches titled:
ethtool: allow setting MDI-X state

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
Tested-by: Aaron Brown aaron.f.brown@intel.com
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/ethtool.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 21eff418091b..fcb4f8e60c1c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -45,8 +45,10 @@ struct ethtool_cmd {
 				 * bits) in Mbps. Please use
 				 * ethtool_cmd_speed()/_set() to
 				 * access it */
-	__u8	eth_tp_mdix;
-	__u8	reserved2;
+	__u8	eth_tp_mdix;	/* twisted pair MDI-X status */
+	__u8    eth_tp_mdix_ctrl; /* twisted pair MDI-X control, when set,
+				   * link should be renegotiated if necessary
+				   */
 	__u32	lp_advertising;	/* Features the link partner advertises */
 	__u32	reserved[2];
 };
@@ -1229,10 +1231,13 @@ struct ethtool_ops {
 #define AUTONEG_DISABLE		0x00
 #define AUTONEG_ENABLE		0x01
 
-/* Mode MDI or MDI-X */
-#define ETH_TP_MDI_INVALID	0x00
-#define ETH_TP_MDI		0x01
-#define ETH_TP_MDI_X		0x02
+/* MDI or MDI-X status/control - if MDI/MDI_X/AUTO is set then
+ * the driver is required to renegotiate link
+ */
+#define ETH_TP_MDI_INVALID	0x00 /* status: unknown; control: unsupported */
+#define ETH_TP_MDI		0x01 /* status: MDI;     control: force MDI */
+#define ETH_TP_MDI_X		0x02 /* status: MDI-X;   control: force MDI-X */
+#define ETH_TP_MDI_AUTO		0x03 /*                  control: auto-select */
 
 /* Wake-On-Lan options. */
 #define WAKE_PHY		(1 << 0)
-- 
cgit v1.2.3


From cdcba7c6508502cddb07c84a4d14d8f624e8f168 Mon Sep 17 00:00:00 2001
From: Mikel Astiz <mikel.astiz@bmw-carit.de>
Date: Thu, 9 Aug 2012 09:52:28 +0200
Subject: Bluetooth: Add more HCI error codes

Add more HCI error codes as defined in the specification.

Signed-off-by: Mikel Astiz <mikel.astiz@bmw-carit.de>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 23cf413e2acf..0f28f7052f82 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -302,8 +302,11 @@ enum {
 
 /* ---- HCI Error Codes ---- */
 #define HCI_ERROR_AUTH_FAILURE		0x05
+#define HCI_ERROR_CONNECTION_TIMEOUT	0x08
 #define HCI_ERROR_REJ_BAD_ADDR		0x0f
 #define HCI_ERROR_REMOTE_USER_TERM	0x13
+#define HCI_ERROR_REMOTE_LOW_RESOURCES	0x14
+#define HCI_ERROR_REMOTE_POWER_OFF	0x15
 #define HCI_ERROR_LOCAL_HOST_TERM	0x16
 #define HCI_ERROR_PAIRING_NOT_ALLOWED	0x18
 
-- 
cgit v1.2.3


From f0d6a0ea330617454032d6e2ed48759858a44427 Mon Sep 17 00:00:00 2001
From: Mikel Astiz <mikel.astiz@bmw-carit.de>
Date: Thu, 9 Aug 2012 09:52:30 +0200
Subject: Bluetooth: mgmt: Add device disconnect reason

MGMT_EV_DEVICE_DISCONNECTED will now expose the disconnection reason to
userland, distinguishing four possible values:

	0x00	Reason not known or unspecified
	0x01	Connection timeout
	0x02	Connection terminated by local host
	0x03	Connection terminated by remote host

Note that the local/remote distinction just determines which side
terminated the low-level connection, regardless of the disconnection of
the higher-level profiles.

This can sometimes be misleading and thus must be used with care. For
example, some hardware combinations would report a locally initiated
disconnection even if the user turned Bluetooth off in the remote side.

Signed-off-by: Mikel Astiz <mikel.astiz@bmw-carit.de>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h |  2 +-
 include/net/bluetooth/mgmt.h     |  9 +++++++++
 net/bluetooth/hci_event.c        | 26 +++++++++++++++++++++++---
 net/bluetooth/mgmt.c             |  9 +++++----
 4 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 7267dafd7159..1bbc1091748c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1002,7 +1002,7 @@ int mgmt_device_connected(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 			  u8 addr_type, u32 flags, u8 *name, u8 name_len,
 			  u8 *dev_class);
 int mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
-			     u8 link_type, u8 addr_type);
+			     u8 link_type, u8 addr_type, u8 reason);
 int mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			   u8 link_type, u8 addr_type, u8 status);
 int mgmt_connect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 4348ee8bda69..1b48effcd973 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -405,7 +405,16 @@ struct mgmt_ev_device_connected {
 	__u8	eir[0];
 } __packed;
 
+#define MGMT_DEV_DISCONN_UNKNOWN	0x00
+#define MGMT_DEV_DISCONN_TIMEOUT	0x01
+#define MGMT_DEV_DISCONN_LOCAL_HOST	0x02
+#define MGMT_DEV_DISCONN_REMOTE		0x03
+
 #define MGMT_EV_DEVICE_DISCONNECTED	0x000C
+struct mgmt_ev_device_disconnected {
+	struct mgmt_addr_info addr;
+	__u8	reason;
+} __packed;
 
 #define MGMT_EV_CONNECT_FAILED		0x000D
 struct mgmt_ev_connect_failed {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index bfa9bcc0f5ef..48d730228c2f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -29,6 +29,7 @@
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/mgmt.h>
 
 /* Handle HCI Event packets */
 
@@ -1875,6 +1876,22 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	}
 }
 
+static u8 hci_to_mgmt_reason(u8 err)
+{
+	switch (err) {
+	case HCI_ERROR_CONNECTION_TIMEOUT:
+		return MGMT_DEV_DISCONN_TIMEOUT;
+	case HCI_ERROR_REMOTE_USER_TERM:
+	case HCI_ERROR_REMOTE_LOW_RESOURCES:
+	case HCI_ERROR_REMOTE_POWER_OFF:
+		return MGMT_DEV_DISCONN_REMOTE;
+	case HCI_ERROR_LOCAL_HOST_TERM:
+		return MGMT_DEV_DISCONN_LOCAL_HOST;
+	default:
+		return MGMT_DEV_DISCONN_UNKNOWN;
+	}
+}
+
 static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_disconn_complete *ev = (void *) skb->data;
@@ -1893,12 +1910,15 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 
 	if (test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &conn->flags) &&
 	    (conn->type == ACL_LINK || conn->type == LE_LINK)) {
-		if (ev->status)
+		if (ev->status) {
 			mgmt_disconnect_failed(hdev, &conn->dst, conn->type,
 					       conn->dst_type, ev->status);
-		else
+		} else {
+			u8 reason = hci_to_mgmt_reason(ev->reason);
+
 			mgmt_device_disconnected(hdev, &conn->dst, conn->type,
-						 conn->dst_type);
+						 conn->dst_type, reason);
+		}
 	}
 
 	if (ev->status == 0) {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index a3329cbd3e4d..05d4b83a0189 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3077,16 +3077,17 @@ static void unpair_device_rsp(struct pending_cmd *cmd, void *data)
 }
 
 int mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr,
-			     u8 link_type, u8 addr_type)
+			     u8 link_type, u8 addr_type, u8 reason)
 {
-	struct mgmt_addr_info ev;
+	struct mgmt_ev_device_disconnected ev;
 	struct sock *sk = NULL;
 	int err;
 
 	mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk);
 
-	bacpy(&ev.bdaddr, bdaddr);
-	ev.type = link_to_bdaddr(link_type, addr_type);
+	bacpy(&ev.addr.bdaddr, bdaddr);
+	ev.addr.type = link_to_bdaddr(link_type, addr_type);
+	ev.reason = reason;
 
 	err = mgmt_event(MGMT_EV_DEVICE_DISCONNECTED, hdev, &ev, sizeof(ev),
 			 sk);
-- 
cgit v1.2.3


From 144ad33020a0af66fbb188ef3f13ca91c5326a69 Mon Sep 17 00:00:00 2001
From: Syam Sidhardhan <s.syam@samsung.com>
Date: Fri, 27 Jul 2012 23:51:21 +0530
Subject: Bluetooth: Use kref for l2cap channel reference counting

This patch changes the struct l2cap_chan and associated code to use
kref api for object refcounting and freeing.

Suggested-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Jaganath Kanakkassery <jaganath.k@samsung.com>
Signed-off-by: Syam Sidhardhan <s.syam@samsung.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/l2cap.h |  3 +--
 net/bluetooth/l2cap_core.c    | 15 ++++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d206296137e2..7ed8e356425a 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -433,11 +433,10 @@ struct l2cap_chan {
 	struct sock *sk;
 
 	struct l2cap_conn	*conn;
+	struct kref	kref;
 
 	__u8		state;
 
-	atomic_t	refcnt;
-
 	__le16		psm;
 	__u16		dcid;
 	__u16		scid;
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index dae895e3ca75..9732f03cfbef 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -406,7 +406,7 @@ struct l2cap_chan *l2cap_chan_create(void)
 
 	chan->state = BT_OPEN;
 
-	atomic_set(&chan->refcnt, 1);
+	kref_init(&chan->kref);
 
 	/* This flag is cleared in l2cap_chan_ready() */
 	set_bit(CONF_NOT_COMPLETE, &chan->conf_state);
@@ -416,8 +416,10 @@ struct l2cap_chan *l2cap_chan_create(void)
 	return chan;
 }
 
-static void l2cap_chan_destroy(struct l2cap_chan *chan)
+static void l2cap_chan_destroy(struct kref *kref)
 {
+	struct l2cap_chan *chan = container_of(kref, struct l2cap_chan, kref);
+
 	BT_DBG("chan %p", chan);
 
 	write_lock(&chan_list_lock);
@@ -429,17 +431,16 @@ static void l2cap_chan_destroy(struct l2cap_chan *chan)
 
 void l2cap_chan_hold(struct l2cap_chan *c)
 {
-	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
+	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount));
 
-	atomic_inc(&c->refcnt);
+	kref_get(&c->kref);
 }
 
 void l2cap_chan_put(struct l2cap_chan *c)
 {
-	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->refcnt));
+	BT_DBG("chan %p orig refcnt %d", c, atomic_read(&c->kref.refcount));
 
-	if (atomic_dec_and_test(&c->refcnt))
-		l2cap_chan_destroy(c);
+	kref_put(&c->kref, l2cap_chan_destroy);
 }
 
 void l2cap_chan_set_defaults(struct l2cap_chan *chan)
-- 
cgit v1.2.3


From 06d7de831dab8b93adb86e039a2f3d36604a9197 Mon Sep 17 00:00:00 2001
From: AceLan Kao <acelan.kao@canonical.com>
Date: Thu, 26 Jul 2012 09:51:08 +0800
Subject: Revert "rfkill: remove dead code"

This reverts commit 2e48928d8a0f38c1b5c81eb3f1294de8a6382c68.

Those functions are needed and should not be removed, or
there is no way to set the rfkill led trigger name.

Signed-off-by: AceLan Kao <acelan.kao@canonical.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill.h | 31 +++++++++++++++++++++++++++++++
 net/rfkill/core.c      | 14 ++++++++++++++
 2 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 6fdf02737e9d..0ec590bb3611 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -354,6 +354,37 @@ static inline bool rfkill_blocked(struct rfkill *rfkill)
 }
 #endif /* RFKILL || RFKILL_MODULE */
 
+
+#ifdef CONFIG_RFKILL_LEDS
+/**
+ * rfkill_get_led_trigger_name - Get the LED trigger name for the button's LED.
+ * This function might return a NULL pointer if registering of the
+ * LED trigger failed. Use this as "default_trigger" for the LED.
+ */
+const char *rfkill_get_led_trigger_name(struct rfkill *rfkill);
+
+/**
+ * rfkill_set_led_trigger_name -- set the LED trigger name
+ * @rfkill: rfkill struct
+ * @name: LED trigger name
+ *
+ * This function sets the LED trigger name of the radio LED
+ * trigger that rfkill creates. It is optional, but if called
+ * must be called before rfkill_register() to be effective.
+ */
+void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name);
+#else
+static inline const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
+{
+	return NULL;
+}
+
+static inline void
+rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
+{
+}
+#endif
+
 #endif /* __KERNEL__ */
 
 #endif /* RFKILL_H */
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 752b72360ebc..c275bad12068 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -150,6 +150,20 @@ static void rfkill_led_trigger_activate(struct led_classdev *led)
 	rfkill_led_trigger_event(rfkill);
 }
 
+const char *rfkill_get_led_trigger_name(struct rfkill *rfkill)
+{
+	return rfkill->led_trigger.name;
+}
+EXPORT_SYMBOL(rfkill_get_led_trigger_name);
+
+void rfkill_set_led_trigger_name(struct rfkill *rfkill, const char *name)
+{
+	BUG_ON(!rfkill);
+
+	rfkill->ledtrigname = name;
+}
+EXPORT_SYMBOL(rfkill_set_led_trigger_name);
+
 static int rfkill_led_trigger_register(struct rfkill *rfkill)
 {
 	rfkill->led_trigger.name = rfkill->ledtrigname
-- 
cgit v1.2.3


From d57ef3a6a2eeb88df47e892c66692e3f59722ffe Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Fri, 10 Aug 2012 21:23:53 +0200
Subject: bcma: detect and register serial flash device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/Kconfig                        |   2 +-
 drivers/bcma/bcma_private.h                 |   1 +
 drivers/bcma/driver_chipcommon_sflash.c     | 123 +++++++++++++++++++++++++++-
 drivers/bcma/main.c                         |   9 ++
 include/linux/bcma/bcma_driver_chipcommon.h |  13 +++
 include/linux/bcma/bcma_regs.h              |   2 +
 6 files changed, 146 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index 06b3207adebd..cf6e4dd22fd2 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -48,7 +48,7 @@ config BCMA_DRIVER_MIPS
 
 config BCMA_SFLASH
 	bool
-	depends on BCMA_DRIVER_MIPS && BROKEN
+	depends on BCMA_DRIVER_MIPS
 	default y
 
 config BCMA_NFLASH
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 3cf9cc923cd2..94bf07c54a43 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -54,6 +54,7 @@ u32 bcma_pmu_get_clockcpu(struct bcma_drv_cc *cc);
 #ifdef CONFIG_BCMA_SFLASH
 /* driver_chipcommon_sflash.c */
 int bcma_sflash_init(struct bcma_drv_cc *cc);
+extern struct platform_device bcma_sflash_dev;
 #else
 static inline int bcma_sflash_init(struct bcma_drv_cc *cc)
 {
diff --git a/drivers/bcma/driver_chipcommon_sflash.c b/drivers/bcma/driver_chipcommon_sflash.c
index 6e157a58a1d7..2c4eec2ca5a0 100644
--- a/drivers/bcma/driver_chipcommon_sflash.c
+++ b/drivers/bcma/driver_chipcommon_sflash.c
@@ -5,15 +5,132 @@
  * Licensed under the GNU/GPL. See COPYING for details.
  */
 
+#include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
-#include <linux/bcma/bcma_driver_chipcommon.h>
-#include <linux/delay.h>
 
 #include "bcma_private.h"
 
+static struct resource bcma_sflash_resource = {
+	.name	= "bcma_sflash",
+	.start	= BCMA_SFLASH,
+	.end	= 0,
+	.flags  = IORESOURCE_MEM | IORESOURCE_READONLY,
+};
+
+struct platform_device bcma_sflash_dev = {
+	.name		= "bcma_sflash",
+	.resource	= &bcma_sflash_resource,
+	.num_resources	= 1,
+};
+
+struct bcma_sflash_tbl_e {
+	char *name;
+	u32 id;
+	u32 blocksize;
+	u16 numblocks;
+};
+
+static struct bcma_sflash_tbl_e bcma_sflash_st_tbl[] = {
+	{ "", 0x14, 0x10000, 32, },
+	{ 0 },
+};
+
+static struct bcma_sflash_tbl_e bcma_sflash_sst_tbl[] = {
+	{ 0 },
+};
+
+static struct bcma_sflash_tbl_e bcma_sflash_at_tbl[] = {
+	{ 0 },
+};
+
+static void bcma_sflash_cmd(struct bcma_drv_cc *cc, u32 opcode)
+{
+	int i;
+	bcma_cc_write32(cc, BCMA_CC_FLASHCTL,
+			BCMA_CC_FLASHCTL_START | opcode);
+	for (i = 0; i < 1000; i++) {
+		if (!(bcma_cc_read32(cc, BCMA_CC_FLASHCTL) &
+		      BCMA_CC_FLASHCTL_BUSY))
+			return;
+		cpu_relax();
+	}
+	bcma_err(cc->core->bus, "SFLASH control command failed (timeout)!\n");
+}
+
 /* Initialize serial flash access */
 int bcma_sflash_init(struct bcma_drv_cc *cc)
 {
-	bcma_err(cc->core->bus, "Serial flash support is broken\n");
+	struct bcma_bus *bus = cc->core->bus;
+	struct bcma_sflash *sflash = &cc->sflash;
+	struct bcma_sflash_tbl_e *e;
+	u32 id, id2;
+
+	switch (cc->capabilities & BCMA_CC_CAP_FLASHT) {
+	case BCMA_CC_FLASHT_STSER:
+		bcma_sflash_cmd(cc, BCMA_CC_FLASHCTL_ST_DP);
+
+		bcma_cc_write32(cc, BCMA_CC_FLASHADDR, 0);
+		bcma_sflash_cmd(cc, BCMA_CC_FLASHCTL_ST_RES);
+		id = bcma_cc_read32(cc, BCMA_CC_FLASHDATA);
+
+		bcma_cc_write32(cc, BCMA_CC_FLASHADDR, 1);
+		bcma_sflash_cmd(cc, BCMA_CC_FLASHCTL_ST_RES);
+		id2 = bcma_cc_read32(cc, BCMA_CC_FLASHDATA);
+
+		switch (id) {
+		case 0xbf:
+			for (e = bcma_sflash_sst_tbl; e->name; e++) {
+				if (e->id == id2)
+					break;
+			}
+			break;
+		default:
+			for (e = bcma_sflash_st_tbl; e->name; e++) {
+				if (e->id == id)
+					break;
+			}
+			break;
+		}
+		if (!e->name) {
+			bcma_err(bus, "Unsupported ST serial flash (id: 0x%X, id2: 0x%X)\n", id, id2);
+			return -ENOTSUPP;
+		}
+
+		break;
+	case BCMA_CC_FLASHT_ATSER:
+		bcma_sflash_cmd(cc, BCMA_CC_FLASHCTL_AT_STATUS);
+		id = bcma_cc_read32(cc, BCMA_CC_FLASHDATA) & 0x3c;
+
+		for (e = bcma_sflash_at_tbl; e->name; e++) {
+			if (e->id == id)
+				break;
+		}
+		if (!e->name) {
+			bcma_err(bus, "Unsupported Atmel serial flash (id: 0x%X)\n", id);
+			return -ENOTSUPP;
+		}
+
+		break;
+	default:
+		bcma_err(bus, "Unsupported flash type\n");
+		return -ENOTSUPP;
+	}
+
+	sflash->window = BCMA_SFLASH;
+	sflash->blocksize = e->blocksize;
+	sflash->numblocks = e->numblocks;
+	sflash->size = sflash->blocksize * sflash->numblocks;
+	sflash->present = true;
+
+	bcma_info(bus, "Found %s serial flash (size: %dKiB, blocksize: 0x%X, blocks: %d)\n",
+		  e->name, sflash->size / 1024, sflash->blocksize,
+		  sflash->numblocks);
+
+	/* Prepare platform device, but don't register it yet. It's too early,
+	 * malloc (required by device_private_init) is not available yet. */
+	bcma_sflash_dev.resource[0].end = bcma_sflash_dev.resource[0].start +
+					  sflash->size;
+	bcma_sflash_dev.dev.platform_data = sflash;
+
 	return 0;
 }
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 758af9ccdef0..a501d259287c 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -7,6 +7,7 @@
 
 #include "bcma_private.h"
 #include <linux/module.h>
+#include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
 #include <linux/slab.h>
 
@@ -136,6 +137,14 @@ static int bcma_register_cores(struct bcma_bus *bus)
 		dev_id++;
 	}
 
+#ifdef CONFIG_BCMA_SFLASH
+	if (bus->drv_cc.sflash.present) {
+		err = platform_device_register(&bcma_sflash_dev);
+		if (err)
+			bcma_err(bus, "Error registering serial flash\n");
+	}
+#endif
+
 	return 0;
 }
 
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index fcb06fb284eb..bed89694c5f9 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -509,6 +509,16 @@ struct bcma_pflash {
 	u32 window_size;
 };
 
+#ifdef CONFIG_BCMA_SFLASH
+struct bcma_sflash {
+	bool present;
+	u32 window;
+	u32 blocksize;
+	u16 numblocks;
+	u32 size;
+};
+#endif
+
 struct bcma_serial_port {
 	void *regs;
 	unsigned long clockspeed;
@@ -529,6 +539,9 @@ struct bcma_drv_cc {
 	struct bcma_chipcommon_pmu pmu;
 #ifdef CONFIG_BCMA_DRIVER_MIPS
 	struct bcma_pflash pflash;
+#ifdef CONFIG_BCMA_SFLASH
+	struct bcma_sflash sflash;
+#endif
 
 	int nr_serial_ports;
 	struct bcma_serial_port serial_ports[4];
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
index a393e82bf7bf..6c9cb93ae3de 100644
--- a/include/linux/bcma/bcma_regs.h
+++ b/include/linux/bcma/bcma_regs.h
@@ -85,4 +85,6 @@
 							 * (2 ZettaBytes), high 32 bits
 							 */
 
+#define BCMA_SFLASH			0x1c000000
+
 #endif /* LINUX_BCMA_REGS_H_ */
-- 
cgit v1.2.3


From 371a00448f95adaa612cf1a0b31a11e7093bc706 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 12 Aug 2012 13:08:05 +0200
Subject: bcma: detect and register NAND flash device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/Kconfig                        |  2 +-
 drivers/bcma/bcma_private.h                 |  1 +
 drivers/bcma/driver_chipcommon_nflash.c     | 28 +++++++++++++++++++++++++---
 drivers/bcma/main.c                         |  8 ++++++++
 include/linux/bcma/bcma_driver_chipcommon.h | 13 +++++++++++++
 5 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index cf6e4dd22fd2..a533af218368 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -53,7 +53,7 @@ config BCMA_SFLASH
 
 config BCMA_NFLASH
 	bool
-	depends on BCMA_DRIVER_MIPS && BROKEN
+	depends on BCMA_DRIVER_MIPS
 	default y
 
 config BCMA_DRIVER_GMAC_CMN
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 94bf07c54a43..169fc58427d3 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -66,6 +66,7 @@ static inline int bcma_sflash_init(struct bcma_drv_cc *cc)
 #ifdef CONFIG_BCMA_NFLASH
 /* driver_chipcommon_nflash.c */
 int bcma_nflash_init(struct bcma_drv_cc *cc);
+extern struct platform_device bcma_nflash_dev;
 #else
 static inline int bcma_nflash_init(struct bcma_drv_cc *cc)
 {
diff --git a/drivers/bcma/driver_chipcommon_nflash.c b/drivers/bcma/driver_chipcommon_nflash.c
index 574d62435bc2..9042781edec3 100644
--- a/drivers/bcma/driver_chipcommon_nflash.c
+++ b/drivers/bcma/driver_chipcommon_nflash.c
@@ -5,15 +5,37 @@
  * Licensed under the GNU/GPL. See COPYING for details.
  */
 
+#include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
-#include <linux/bcma/bcma_driver_chipcommon.h>
-#include <linux/delay.h>
 
 #include "bcma_private.h"
 
+struct platform_device bcma_nflash_dev = {
+	.name		= "bcma_nflash",
+	.num_resources	= 0,
+};
+
 /* Initialize NAND flash access */
 int bcma_nflash_init(struct bcma_drv_cc *cc)
 {
-	bcma_err(cc->core->bus, "NAND flash support is broken\n");
+	struct bcma_bus *bus = cc->core->bus;
+
+	if (bus->chipinfo.id != BCMA_CHIP_ID_BCM4706 &&
+	    cc->core->id.rev != 0x38) {
+		bcma_err(bus, "NAND flash on unsupported board!\n");
+		return -ENOTSUPP;
+	}
+
+	if (!(cc->capabilities & BCMA_CC_CAP_NFLASH)) {
+		bcma_err(bus, "NAND flash not present according to ChipCommon\n");
+		return -ENODEV;
+	}
+
+	cc->nflash.present = true;
+
+	/* Prepare platform device, but don't register it yet. It's too early,
+	 * malloc (required by device_private_init) is not available yet. */
+	bcma_nflash_dev.dev.platform_data = &cc->nflash;
+
 	return 0;
 }
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index a501d259287c..a8f570d69075 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -145,6 +145,14 @@ static int bcma_register_cores(struct bcma_bus *bus)
 	}
 #endif
 
+#ifdef CONFIG_BCMA_NFLASH
+	if (bus->drv_cc.nflash.present) {
+		err = platform_device_register(&bcma_nflash_dev);
+		if (err)
+			bcma_err(bus, "Error registering NAND flash\n");
+	}
+#endif
+
 	return 0;
 }
 
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index bed89694c5f9..9810d4b29abf 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -519,6 +519,16 @@ struct bcma_sflash {
 };
 #endif
 
+#ifdef CONFIG_BCMA_NFLASH
+struct mtd_info;
+
+struct bcma_nflash {
+	bool present;
+
+	struct mtd_info *mtd;
+};
+#endif
+
 struct bcma_serial_port {
 	void *regs;
 	unsigned long clockspeed;
@@ -542,6 +552,9 @@ struct bcma_drv_cc {
 #ifdef CONFIG_BCMA_SFLASH
 	struct bcma_sflash sflash;
 #endif
+#ifdef CONFIG_BCMA_NFLASH
+	struct bcma_nflash nflash;
+#endif
 
 	int nr_serial_ports;
 	struct bcma_serial_port serial_ports[4];
-- 
cgit v1.2.3


From 0115e8e30d6fcdd4b8faa30d3ffd90859a591f51 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Aug 2012 17:19:46 +0000
Subject: net: remove delay at device dismantle

I noticed extra one second delay in device dismantle, tracked down to
a call to dst_dev_event() while some call_rcu() are still in RCU queues.

These call_rcu() were posted by rt_free(struct rtable *rt) calls.

We then wait a little (but one second) in netdev_wait_allrefs() before
kicking again NETDEV_UNREGISTER.

As the call_rcu() are now completed, dst_dev_event() can do the needed
device swap on busy dst.

To solve this problem, add a new NETDEV_UNREGISTER_FINAL, called
after a rcu_barrier(), but outside of RTNL lock.

Use NETDEV_UNREGISTER_FINAL with care !

Change dst_dev_event() handler to react to NETDEV_UNREGISTER_FINAL

Also remove NETDEV_UNREGISTER_BATCH, as its not used anymore after
IP cache removal.

With help from Gao feng

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/core/dev.c            | 22 ++++++++--------------
 net/core/dst.c            |  2 +-
 net/core/fib_rules.c      |  3 ++-
 net/core/rtnetlink.c      |  2 +-
 net/ipv4/devinet.c        |  6 +++++-
 net/ipv4/fib_frontend.c   |  8 ++++----
 net/ipv6/addrconf.c       |  6 +++++-
 8 files changed, 27 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4936f09a9333..9ad7fa8c10e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1553,7 +1553,7 @@ struct packet_type {
 #define NETDEV_PRE_TYPE_CHANGE	0x000E
 #define NETDEV_POST_TYPE_CHANGE	0x000F
 #define NETDEV_POST_INIT	0x0010
-#define NETDEV_UNREGISTER_BATCH 0x0011
+#define NETDEV_UNREGISTER_FINAL 0x0011
 #define NETDEV_RELEASE		0x0012
 #define NETDEV_NOTIFY_PEERS	0x0013
 #define NETDEV_JOIN		0x0014
diff --git a/net/core/dev.c b/net/core/dev.c
index 088923fe4066..0640d2a859c6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1406,7 +1406,6 @@ rollback:
 				nb->notifier_call(nb, NETDEV_DOWN, dev);
 			}
 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
-			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
 		}
 	}
 
@@ -1448,7 +1447,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 				nb->notifier_call(nb, NETDEV_DOWN, dev);
 			}
 			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
-			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
 		}
 	}
 unlock:
@@ -1468,7 +1466,8 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
 
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
-	ASSERT_RTNL();
+	if (val != NETDEV_UNREGISTER_FINAL)
+		ASSERT_RTNL();
 	return raw_notifier_call_chain(&netdev_chain, val, dev);
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -5331,10 +5330,6 @@ static void rollback_registered_many(struct list_head *head)
 		netdev_unregister_kobject(dev);
 	}
 
-	/* Process any work delayed until the end of the batch */
-	dev = list_first_entry(head, struct net_device, unreg_list);
-	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
-
 	synchronize_net();
 
 	list_for_each_entry(dev, head, unreg_list)
@@ -5787,9 +5782,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
 			/* Rebroadcast unregister notification */
 			call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-			/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
-			 * should have already handle it the first time */
-
+			rcu_barrier();
+			call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
 			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
 				     &dev->state)) {
 				/* We must not have linkwatch events
@@ -5851,9 +5845,8 @@ void netdev_run_todo(void)
 
 	__rtnl_unlock();
 
-	/* Wait for rcu callbacks to finish before attempting to drain
-	 * the device list.  This usually avoids a 250ms wait.
-	 */
+
+	/* Wait for rcu callbacks to finish before next phase */
 	if (!list_empty(&list))
 		rcu_barrier();
 
@@ -5862,6 +5855,8 @@ void netdev_run_todo(void)
 			= list_first_entry(&list, struct net_device, todo_list);
 		list_del(&dev->todo_list);
 
+		call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
+
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
 			pr_err("network todo '%s' but state %d\n",
 			       dev->name, dev->reg_state);
@@ -6256,7 +6251,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	   the device is just moving and can keep their slaves up.
 	*/
 	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-	call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
 	rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
 
 	/*
diff --git a/net/core/dst.c b/net/core/dst.c
index 56d63612e1e4..f6593d238e9a 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
 	struct dst_entry *dst, *last = NULL;
 
 	switch (event) {
-	case NETDEV_UNREGISTER:
+	case NETDEV_UNREGISTER_FINAL:
 	case NETDEV_DOWN:
 		mutex_lock(&dst_gc_mutex);
 		for (dst = dst_busy_list; dst; dst = dst->next) {
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ab7db83236c9..585093755c23 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -711,15 +711,16 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 	struct net *net = dev_net(dev);
 	struct fib_rules_ops *ops;
 
-	ASSERT_RTNL();
 
 	switch (event) {
 	case NETDEV_REGISTER:
+		ASSERT_RTNL();
 		list_for_each_entry(ops, &net->rules_ops, list)
 			attach_rules(&ops->rules_list, dev);
 		break;
 
 	case NETDEV_UNREGISTER:
+		ASSERT_RTNL();
 		list_for_each_entry(ops, &net->rules_ops, list)
 			detach_rules(&ops->rules_list, dev);
 		break;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 34d975b0f277..c64efcff8078 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2358,7 +2358,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_PRE_TYPE_CHANGE:
 	case NETDEV_GOING_DOWN:
 	case NETDEV_UNREGISTER:
-	case NETDEV_UNREGISTER_BATCH:
+	case NETDEV_UNREGISTER_FINAL:
 	case NETDEV_RELEASE:
 	case NETDEV_JOIN:
 		break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index adf273f8ad2e..6a5e6e4b142c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1147,8 +1147,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 			 void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct in_device *in_dev = __in_dev_get_rtnl(dev);
+	struct in_device *in_dev;
 
+	if (event == NETDEV_UNREGISTER_FINAL)
+		goto out;
+
+	in_dev = __in_dev_get_rtnl(dev);
 	ASSERT_RTNL();
 
 	if (!in_dev) {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7f073a38c87d..fd7d9ae64f16 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct in_device *in_dev = __in_dev_get_rtnl(dev);
+	struct in_device *in_dev;
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_UNREGISTER) {
@@ -1050,9 +1050,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		return NOTIFY_DONE;
 	}
 
-	if (!in_dev)
+	if (event == NETDEV_UNREGISTER_FINAL)
 		return NOTIFY_DONE;
 
+	in_dev = __in_dev_get_rtnl(dev);
+
 	switch (event) {
 	case NETDEV_UP:
 		for_ifa(in_dev) {
@@ -1071,8 +1073,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	case NETDEV_CHANGE:
 		rt_cache_flush(dev_net(dev), 0);
 		break;
-	case NETDEV_UNREGISTER_BATCH:
-		break;
 	}
 	return NOTIFY_DONE;
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6bc85f7c31e3..e581009cb09e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2566,10 +2566,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			   void *data)
 {
 	struct net_device *dev = (struct net_device *) data;
-	struct inet6_dev *idev = __in6_dev_get(dev);
+	struct inet6_dev *idev;
 	int run_pending = 0;
 	int err;
 
+	if (event == NETDEV_UNREGISTER_FINAL)
+		return NOTIFY_DONE;
+
+	idev = __in6_dev_get(dev);
 	switch (event) {
 	case NETDEV_REGISTER:
 		if (!idev && dev->mtu >= IPV6_MIN_MTU) {
-- 
cgit v1.2.3


From b32607dd47d456b99f0a16f1c37bc8a0975ffb19 Mon Sep 17 00:00:00 2001
From: "Allan, Bruce W" <bruce.w.allan@intel.com>
Date: Mon, 20 Aug 2012 04:55:29 +0000
Subject: mdio: translation of MMD EEE registers to/from ethtool settings

The helper functions which translate IEEE MDIO Manageable Device (MMD)
Energy-Efficient Ethernet (EEE) registers 3.20, 7.60 and 7.61 to and from
the comparable ethtool supported/advertised settings will be needed by
drivers other than those in PHYLIB (e.g. e1000e in a follow-on patch).

In the same fashion as similar translation functions in linux/mii.h, move
these functions from the PHYLIB core to the linux/mdio.h header file so the
code will not have to be duplicated in each driver needing MMD-to-ethtool
(and vice-versa) translations.  The function and some variable names have
been renamed to be more descriptive.

Not tested on the only hardware that currently calls the related functions,
stmmac, because I don't have access to any.  Has been compile tested and
the translations have been tested on a locally modified version of e1000e.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 74 +++++----------------------------------------
 include/linux/mdio.h  | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 7ca2ff97c368..ef9ea9248223 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -1035,66 +1035,6 @@ static void phy_write_mmd_indirect(struct mii_bus *bus, int prtad, int devad,
 	bus->write(bus, addr, MII_MMD_DATA, data);
 }
 
-static u32 phy_eee_to_adv(u16 eee_adv)
-{
-	u32 adv = 0;
-
-	if (eee_adv & MDIO_EEE_100TX)
-		adv |= ADVERTISED_100baseT_Full;
-	if (eee_adv & MDIO_EEE_1000T)
-		adv |= ADVERTISED_1000baseT_Full;
-	if (eee_adv & MDIO_EEE_10GT)
-		adv |= ADVERTISED_10000baseT_Full;
-	if (eee_adv & MDIO_EEE_1000KX)
-		adv |= ADVERTISED_1000baseKX_Full;
-	if (eee_adv & MDIO_EEE_10GKX4)
-		adv |= ADVERTISED_10000baseKX4_Full;
-	if (eee_adv & MDIO_EEE_10GKR)
-		adv |= ADVERTISED_10000baseKR_Full;
-
-	return adv;
-}
-
-static u32 phy_eee_to_supported(u16 eee_caported)
-{
-	u32 supported = 0;
-
-	if (eee_caported & MDIO_EEE_100TX)
-		supported |= SUPPORTED_100baseT_Full;
-	if (eee_caported & MDIO_EEE_1000T)
-		supported |= SUPPORTED_1000baseT_Full;
-	if (eee_caported & MDIO_EEE_10GT)
-		supported |= SUPPORTED_10000baseT_Full;
-	if (eee_caported & MDIO_EEE_1000KX)
-		supported |= SUPPORTED_1000baseKX_Full;
-	if (eee_caported & MDIO_EEE_10GKX4)
-		supported |= SUPPORTED_10000baseKX4_Full;
-	if (eee_caported & MDIO_EEE_10GKR)
-		supported |= SUPPORTED_10000baseKR_Full;
-
-	return supported;
-}
-
-static u16 phy_adv_to_eee(u32 adv)
-{
-	u16 reg = 0;
-
-	if (adv & ADVERTISED_100baseT_Full)
-		reg |= MDIO_EEE_100TX;
-	if (adv & ADVERTISED_1000baseT_Full)
-		reg |= MDIO_EEE_1000T;
-	if (adv & ADVERTISED_10000baseT_Full)
-		reg |= MDIO_EEE_10GT;
-	if (adv & ADVERTISED_1000baseKX_Full)
-		reg |= MDIO_EEE_1000KX;
-	if (adv & ADVERTISED_10000baseKX4_Full)
-		reg |= MDIO_EEE_10GKX4;
-	if (adv & ADVERTISED_10000baseKR_Full)
-		reg |= MDIO_EEE_10GKR;
-
-	return reg;
-}
-
 /**
  * phy_init_eee - init and check the EEE feature
  * @phydev: target phy_device struct
@@ -1132,7 +1072,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
 		if (eee_cap < 0)
 			return eee_cap;
 
-		cap = phy_eee_to_supported(eee_cap);
+		cap = mmd_eee_cap_to_ethtool_sup_t(eee_cap);
 		if (!cap)
 			goto eee_exit;
 
@@ -1149,8 +1089,8 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
 		if (eee_adv < 0)
 			return eee_adv;
 
-		adv = phy_eee_to_adv(eee_adv);
-		lp = phy_eee_to_adv(eee_lp);
+		adv = mmd_eee_adv_to_ethtool_adv_t(eee_adv);
+		lp = mmd_eee_adv_to_ethtool_adv_t(eee_lp);
 		idx = phy_find_setting(phydev->speed, phydev->duplex);
 		if ((lp & adv & settings[idx].setting))
 			goto eee_exit;
@@ -1210,21 +1150,21 @@ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_eee *data)
 				    MDIO_MMD_PCS, phydev->addr);
 	if (val < 0)
 		return val;
-	data->supported = phy_eee_to_supported(val);
+	data->supported = mmd_eee_cap_to_ethtool_sup_t(val);
 
 	/* Get advertisement EEE */
 	val = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_ADV,
 				    MDIO_MMD_AN, phydev->addr);
 	if (val < 0)
 		return val;
-	data->advertised = phy_eee_to_adv(val);
+	data->advertised = mmd_eee_adv_to_ethtool_adv_t(val);
 
 	/* Get LP advertisement EEE */
 	val = phy_read_mmd_indirect(phydev->bus, MDIO_AN_EEE_LPABLE,
 				    MDIO_MMD_AN, phydev->addr);
 	if (val < 0)
 		return val;
-	data->lp_advertised = phy_eee_to_adv(val);
+	data->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(val);
 
 	return 0;
 }
@@ -1241,7 +1181,7 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
 {
 	int val;
 
-	val = phy_adv_to_eee(data->advertised);
+	val = ethtool_adv_to_mmd_eee_adv_t(data->advertised);
 	phy_write_mmd_indirect(phydev->bus, MDIO_AN_EEE_ADV, MDIO_MMD_AN,
 			       phydev->addr, val);
 
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 7cccafe50e7b..6c406845f7e2 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -377,5 +377,88 @@ static inline void mdio45_ethtool_gset(const struct mdio_if_info *mdio,
 extern int mdio_mii_ioctl(const struct mdio_if_info *mdio,
 			  struct mii_ioctl_data *mii_data, int cmd);
 
+/**
+ * mmd_eee_cap_to_ethtool_sup_t
+ * @eee_cap: value of the MMD EEE Capability register
+ *
+ * A small helper function that translates MMD EEE Capability (3.20) bits
+ * to ethtool supported settings.
+ */
+static inline u32 mmd_eee_cap_to_ethtool_sup_t(u16 eee_cap)
+{
+	u32 supported = 0;
+
+	if (eee_cap & MDIO_EEE_100TX)
+		supported |= SUPPORTED_100baseT_Full;
+	if (eee_cap & MDIO_EEE_1000T)
+		supported |= SUPPORTED_1000baseT_Full;
+	if (eee_cap & MDIO_EEE_10GT)
+		supported |= SUPPORTED_10000baseT_Full;
+	if (eee_cap & MDIO_EEE_1000KX)
+		supported |= SUPPORTED_1000baseKX_Full;
+	if (eee_cap & MDIO_EEE_10GKX4)
+		supported |= SUPPORTED_10000baseKX4_Full;
+	if (eee_cap & MDIO_EEE_10GKR)
+		supported |= SUPPORTED_10000baseKR_Full;
+
+	return supported;
+}
+
+/**
+ * mmd_eee_adv_to_ethtool_adv_t
+ * @eee_adv: value of the MMD EEE Advertisement/Link Partner Ability registers
+ *
+ * A small helper function that translates the MMD EEE Advertisment (7.60)
+ * and MMD EEE Link Partner Ability (7.61) bits to ethtool advertisement
+ * settings.
+ */
+static inline u32 mmd_eee_adv_to_ethtool_adv_t(u16 eee_adv)
+{
+	u32 adv = 0;
+
+	if (eee_adv & MDIO_EEE_100TX)
+		adv |= ADVERTISED_100baseT_Full;
+	if (eee_adv & MDIO_EEE_1000T)
+		adv |= ADVERTISED_1000baseT_Full;
+	if (eee_adv & MDIO_EEE_10GT)
+		adv |= ADVERTISED_10000baseT_Full;
+	if (eee_adv & MDIO_EEE_1000KX)
+		adv |= ADVERTISED_1000baseKX_Full;
+	if (eee_adv & MDIO_EEE_10GKX4)
+		adv |= ADVERTISED_10000baseKX4_Full;
+	if (eee_adv & MDIO_EEE_10GKR)
+		adv |= ADVERTISED_10000baseKR_Full;
+
+	return adv;
+}
+
+/**
+ * ethtool_adv_to_mmd_eee_adv_t
+ * @adv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement settings
+ * to EEE advertisements for the MMD EEE Advertisement (7.60) and
+ * MMD EEE Link Partner Ability (7.61) registers.
+ */
+static inline u16 ethtool_adv_to_mmd_eee_adv_t(u32 adv)
+{
+	u16 reg = 0;
+
+	if (adv & ADVERTISED_100baseT_Full)
+		reg |= MDIO_EEE_100TX;
+	if (adv & ADVERTISED_1000baseT_Full)
+		reg |= MDIO_EEE_1000T;
+	if (adv & ADVERTISED_10000baseT_Full)
+		reg |= MDIO_EEE_10GT;
+	if (adv & ADVERTISED_1000baseKX_Full)
+		reg |= MDIO_EEE_1000KX;
+	if (adv & ADVERTISED_10000baseKX4_Full)
+		reg |= MDIO_EEE_10GKX4;
+	if (adv & ADVERTISED_10000baseKR_Full)
+		reg |= MDIO_EEE_10GKR;
+
+	return reg;
+}
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_MDIO_H__ */
-- 
cgit v1.2.3


From 0fa7fa98dbcc2789409ed24e885485e645803d7f Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 21 Aug 2012 01:06:47 +0000
Subject: packet: Protect packet sk list with mutex (v2)

Change since v1:

* Fixed inuse counters access spotted by Eric

In patch eea68e2f (packet: Report socket mclist info via diag module) I've
introduced a "scheduling in atomic" problem in packet diag module -- the
socket list is traversed under rcu_read_lock() while performed under it sk
mclist access requires rtnl lock (i.e. -- mutex) to be taken.

[152363.820563] BUG: scheduling while atomic: crtools/12517/0x10000002
[152363.820573] 4 locks held by crtools/12517:
[152363.820581]  #0:  (sock_diag_mutex){+.+.+.}, at: [<ffffffff81a2dcb5>] sock_diag_rcv+0x1f/0x3e
[152363.820613]  #1:  (sock_diag_table_mutex){+.+.+.}, at: [<ffffffff81a2de70>] sock_diag_rcv_msg+0xdb/0x11a
[152363.820644]  #2:  (nlk->cb_mutex){+.+.+.}, at: [<ffffffff81a67d01>] netlink_dump+0x23/0x1ab
[152363.820693]  #3:  (rcu_read_lock){.+.+..}, at: [<ffffffff81b6a049>] packet_diag_dump+0x0/0x1af

Similar thing was then re-introduced by further packet diag patches (fanount
mutex and pgvec mutex for rings) :(

Apart from being terribly sorry for the above, I propose to change the packet
sk list protection from spinlock to mutex. This lock currently protects two
modifications:

* sklist
* prot inuse counters

The sklist modifications can be just reprotected with mutex since they already
occur in a sleeping context. The inuse counters modifications are trickier -- the
__this_cpu_-s are used inside, thus requiring the caller to handle the potential
issues with contexts himself. Since packet sockets' counters are modified in two
places only (packet_create and packet_release) we only need to protect the context
from being preempted. BH disabling is not required in this case.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/packet.h |  2 +-
 net/packet/af_packet.c     | 16 +++++++++++-----
 net/packet/diag.c          |  6 +++---
 3 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/packet.h b/include/net/netns/packet.h
index cb4e894c0f8d..4780b080a436 100644
--- a/include/net/netns/packet.h
+++ b/include/net/netns/packet.h
@@ -8,7 +8,7 @@
 #include <linux/spinlock.h>
 
 struct netns_packet {
-	spinlock_t		sklist_lock;
+	struct mutex		sklist_lock;
 	struct hlist_head	sklist;
 };
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bfaa0a83f0eb..f220c5bdb71f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2317,10 +2317,13 @@ static int packet_release(struct socket *sock)
 	net = sock_net(sk);
 	po = pkt_sk(sk);
 
-	spin_lock_bh(&net->packet.sklist_lock);
+	mutex_lock(&net->packet.sklist_lock);
 	sk_del_node_init_rcu(sk);
+	mutex_unlock(&net->packet.sklist_lock);
+
+	preempt_disable();
 	sock_prot_inuse_add(net, sk->sk_prot, -1);
-	spin_unlock_bh(&net->packet.sklist_lock);
+	preempt_enable();
 
 	spin_lock(&po->bind_lock);
 	unregister_prot_hook(sk, false);
@@ -2519,10 +2522,13 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 		register_prot_hook(sk);
 	}
 
-	spin_lock_bh(&net->packet.sklist_lock);
+	mutex_lock(&net->packet.sklist_lock);
 	sk_add_node_rcu(sk, &net->packet.sklist);
+	mutex_unlock(&net->packet.sklist_lock);
+
+	preempt_disable();
 	sock_prot_inuse_add(net, &packet_proto, 1);
-	spin_unlock_bh(&net->packet.sklist_lock);
+	preempt_enable();
 
 	return 0;
 out:
@@ -3775,7 +3781,7 @@ static const struct file_operations packet_seq_fops = {
 
 static int __net_init packet_net_init(struct net *net)
 {
-	spin_lock_init(&net->packet.sklist_lock);
+	mutex_init(&net->packet.sklist_lock);
 	INIT_HLIST_HEAD(&net->packet.sklist);
 
 	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
diff --git a/net/packet/diag.c b/net/packet/diag.c
index bc33fbe8a5ef..39bce0d50df9 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -177,8 +177,8 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	net = sock_net(skb->sk);
 	req = nlmsg_data(cb->nlh);
 
-	rcu_read_lock();
-	sk_for_each_rcu(sk, node, &net->packet.sklist) {
+	mutex_lock(&net->packet.sklist_lock);
+	sk_for_each(sk, node, &net->packet.sklist) {
 		if (!net_eq(sock_net(sk), net))
 			continue;
 		if (num < s_num)
@@ -192,7 +192,7 @@ next:
 		num++;
 	}
 done:
-	rcu_read_unlock();
+	mutex_unlock(&net->packet.sklist_lock);
 	cb->args[0] = num;
 
 	return skb->len;
-- 
cgit v1.2.3


From f63c45e0e63fd1bccb6d021fe4de20f82114a024 Mon Sep 17 00:00:00 2001
From: Rami Rosen <rosenr@marvell.com>
Date: Thu, 23 Aug 2012 02:55:41 +0000
Subject: packet: fix broken build.

This patch fixes a broken build due to a missing header:
...
  CC      net/ipv4/proc.o
In file included from include/net/net_namespace.h:15,
                 from net/ipv4/proc.c:35:
include/net/netns/packet.h:11: error: field 'sklist_lock' has incomplete type
...

The lock of netns_packet has been replaced by a recent patch to be a mutex instead of a spinlock,
but we need to replace the header file to be linux/mutex.h instead of linux/spinlock.h as well.

See commit 0fa7fa98dbcc2789409ed24e885485e645803d7f:
packet: Protect packet sk list with mutex (v2) patch,

Signed-off-by: Rami Rosen <rosenr@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/packet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netns/packet.h b/include/net/netns/packet.h
index 4780b080a436..17ec2b95c062 100644
--- a/include/net/netns/packet.h
+++ b/include/net/netns/packet.h
@@ -5,7 +5,7 @@
 #define __NETNS_PACKET_H__
 
 #include <linux/rculist.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 
 struct netns_packet {
 	struct mutex		sklist_lock;
-- 
cgit v1.2.3


From 8f4cccbbd92f2ad0ddbbc498ef7cee2a1c3defe9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 20 Aug 2012 22:16:51 +0100
Subject: net: Set device operstate at registration time

The operstate of a device is initially IF_OPER_UNKNOWN and is updated
asynchronously by linkwatch after each change of carrier state
reported by the driver.  The default carrier state of a net device is
on, and this will never be changed on drivers that do not support
carrier detection, thus the operstate remains IF_OPER_UNKNOWN.

For devices that do support carrier detection, the driver must set the
carrier state to off initially, then poll the hardware state when the
device is opened.  However, we must not activate linkwatch for a
unregistered device, and commit b473001 ('net: Do not fire linkwatch
events until the device is registered.') ensured that we don't.  But
this means that the operstate for many devices that support carrier
detection remains IF_OPER_UNKNOWN when it should be IF_OPER_DOWN.

The same issue exists with the dormant state.

The proper initialisation sequence, avoiding a race with opening of
the device, is:

        rtnl_lock();
        rc = register_netdevice(dev);
        if (rc)
                goto out_unlock;
        netif_carrier_off(dev); /* or netif_dormant_on(dev) */
        rtnl_unlock();

but it seems silly that this should have to be repeated in so many
drivers.  Further, the operstate seen immediately after opening the
device may still be IF_OPER_UNKNOWN due to the asynchronous nature of
linkwatch.

Commit 22604c8 ('net: Fix for initial link state in 2.6.28') attempted
to fix this by setting the operstate synchronously, but it was
reverted as it could lead to deadlock.

This initialises the operstate synchronously at registration time
only.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 2 ++
 net/core/link_watch.c     | 8 ++++++++
 3 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9ad7fa8c10e0..ccac82e61604 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2227,6 +2227,7 @@ static inline void dev_hold(struct net_device *dev)
  * kind of lower layer not just hardware media.
  */
 
+extern void linkwatch_init_dev(struct net_device *dev);
 extern void linkwatch_fire_event(struct net_device *dev);
 extern void linkwatch_forget_dev(struct net_device *dev);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index bc857fead8c8..2f25d0cac51c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5648,6 +5648,8 @@ int register_netdevice(struct net_device *dev)
 
 	set_bit(__LINK_STATE_PRESENT, &dev->state);
 
+	linkwatch_init_dev(dev);
+
 	dev_init_scheduler(dev);
 	dev_hold(dev);
 	list_netdevice(dev);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index c3519c6d1b16..a01922219a23 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -76,6 +76,14 @@ static void rfc2863_policy(struct net_device *dev)
 }
 
 
+void linkwatch_init_dev(struct net_device *dev)
+{
+	/* Handle pre-registration link state changes */
+	if (!netif_carrier_ok(dev) || netif_dormant(dev))
+		rfc2863_policy(dev);
+}
+
+
 static bool linkwatch_urgent_event(struct net_device *dev)
 {
 	if (!netif_running(dev))
-- 
cgit v1.2.3


From 9b361c13ceeae872161175d39f4798ee345ed10c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 23 Aug 2012 03:26:52 +0000
Subject: vlan: add helper which can be called to see if device is used by vlan

also, remove unused vlan_info definition from header

CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 9 +++++++--
 net/8021q/vlan_core.c   | 6 ++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a810987cb80e..e6ff12dd717b 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -74,8 +74,6 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
 
-struct vlan_info;
-
 static inline int is_vlan_dev(struct net_device *dev)
 {
         return dev->priv_flags & IFF_802_1Q_VLAN;
@@ -101,6 +99,8 @@ extern int vlan_vids_add_by_dev(struct net_device *dev,
 				const struct net_device *by_dev);
 extern void vlan_vids_del_by_dev(struct net_device *dev,
 				 const struct net_device *by_dev);
+
+extern bool vlan_uses_dev(const struct net_device *dev);
 #else
 static inline struct net_device *
 __vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id)
@@ -151,6 +151,11 @@ static inline void vlan_vids_del_by_dev(struct net_device *dev,
 					const struct net_device *by_dev)
 {
 }
+
+static inline bool vlan_uses_dev(const struct net_device *dev)
+{
+	return false;
+}
 #endif
 
 /**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 8ca533c95de0..b258da88f675 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -368,3 +368,9 @@ void vlan_vids_del_by_dev(struct net_device *dev,
 		vlan_vid_del(dev, vid_info->vid);
 }
 EXPORT_SYMBOL(vlan_vids_del_by_dev);
+
+bool vlan_uses_dev(const struct net_device *dev)
+{
+	return rtnl_dereference(dev->vlan_info) ? true : false;
+}
+EXPORT_SYMBOL(vlan_uses_dev);
-- 
cgit v1.2.3


From 5f2d04f1f9b52604fca6ee08a77972c0df67e082 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:13:55 +0200
Subject: ipv4: fix path MTU discovery with connection tracking

IPv4 conntrack defragments incoming packet at the PRE_ROUTING hook and
(in case of forwarded packets) refragments them at POST_ROUTING
independent of the IP_DF flag. Refragmentation uses the dst_mtu() of
the local route without caring about the original fragment sizes,
thereby breaking PMTUD.

This patch fixes this by keeping track of the largest received fragment
with IP_DF set and generates an ICMP fragmentation required error during
refragmentation if that size exceeds the MTU.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h | 2 ++
 include/net/ip.h        | 2 ++
 net/ipv4/ip_fragment.c  | 8 +++++++-
 net/ipv4/ip_output.c    | 4 +++-
 4 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 2431cf83aeca..5098ee7b7e0e 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -29,6 +29,8 @@ struct inet_frag_queue {
 #define INET_FRAG_COMPLETE	4
 #define INET_FRAG_FIRST_IN	2
 #define INET_FRAG_LAST_IN	1
+
+	u16			max_size;
 };
 
 #define INETFRAGS_HASHSZ		64
diff --git a/include/net/ip.h b/include/net/ip.h
index 5a5d84d3d2c6..0707fb9551aa 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -42,6 +42,8 @@ struct inet_skb_parm {
 #define IPSKB_XFRM_TRANSFORMED	4
 #define IPSKB_FRAG_COMPLETE	8
 #define IPSKB_REROUTED		16
+
+	u16			frag_max_size;
 };
 
 static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8d07c973409c..fa6a12c51066 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -523,6 +523,10 @@ found:
 	if (offset == 0)
 		qp->q.last_in |= INET_FRAG_FIRST_IN;
 
+	if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
+	    skb->len + ihl > qp->q.max_size)
+		qp->q.max_size = skb->len + ihl;
+
 	if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 	    qp->q.meat == qp->q.len)
 		return ip_frag_reasm(qp, prev, dev);
@@ -646,9 +650,11 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = qp->q.stamp;
+	IPCB(head)->frag_max_size = qp->q.max_size;
 
 	iph = ip_hdr(head);
-	iph->frag_off = 0;
+	/* max_size != 0 implies at least one fragment had IP_DF set */
+	iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
 	iph->tot_len = htons(len);
 	iph->tos |= ecn;
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c196d749daf2..a5beab1dc958 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -467,7 +467,9 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 	iph = ip_hdr(skb);
 
-	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
+	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
+		     (IPCB(skb)->frag_max_size &&
+		      IPCB(skb)->frag_max_size > dst_mtu(&rt->dst)))) {
 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(ip_skb_dst_mtu(skb)));
-- 
cgit v1.2.3


From 4cdd34084d539c758d00c5dc7bf95db2e4f2bc70 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:13:58 +0200
Subject: netfilter: nf_conntrack_ipv6: improve fragmentation handling

The IPv6 conntrack fragmentation currently has a couple of shortcomings.
Fragmentes are collected in PREROUTING/OUTPUT, are defragmented, the
defragmented packet is then passed to conntrack, the resulting conntrack
information is attached to each original fragment and the fragments then
continue their way through the stack.

Helper invocation occurs in the POSTROUTING hook, at which point only
the original fragments are available. The result of this is that
fragmented packets are never passed to helpers.

This patch improves the situation in the following way:

- If a reassembled packet belongs to a connection that has a helper
  assigned, the reassembled packet is passed through the stack instead
  of the original fragments.

- During defragmentation, the largest received fragment size is stored.
  On output, the packet is refragmented if required. If the largest
  received fragment size exceeds the outgoing MTU, a "packet too big"
  message is generated, thus behaving as if the original fragments
  were passed through the stack from an outside point of view.

- The ipv6_helper() hook function can't receive fragments anymore for
  connections using a helper, so it is switched to use ipv6_skip_exthdr()
  instead of the netfilter specific nf_ct_ipv6_skip_exthdr() and the
  reassembled packets are passed to connection tracking helpers.

The result of this is that we can properly track fragmented packets, but
still generate ICMPv6 Packet too big messages if we would have before.

This patch is also required as a precondition for IPv6 NAT, where NAT
helpers might enlarge packets up to a point that they require
fragmentation. In that case we can't generate Packet too big messages
since the proper MTU can't be calculated in all cases (f.i. when
changing textual representation of a variable amount of addresses),
so the packet is transparently fragmented iff the original packet or
fragments would have fit the outgoing MTU.

IPVS parts by Jesper Dangaard Brouer <brouer@redhat.com>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ipv6.h                           |  1 +
 net/ipv6/ip6_output.c                          |  7 +++--
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 41 ++++++++++++++++++++------
 net/ipv6/netfilter/nf_conntrack_reasm.c        | 19 ++++++++++--
 net/netfilter/ipvs/ip_vs_xmit.c                |  9 +++++-
 5 files changed, 62 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 879db26ec401..0b94e91ed685 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -256,6 +256,7 @@ struct inet6_skb_parm {
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	__u16			dsthao;
 #endif
+	__u16			frag_max_size;
 
 #define IP6SKB_XFRM_TRANSFORMED	1
 #define IP6SKB_FORWARDED	2
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5b2d63ed793e..a4f6263fddca 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -493,7 +493,8 @@ int ip6_forward(struct sk_buff *skb)
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if (skb->len > mtu && !skb_is_gso(skb)) {
+	if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
+	    (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -636,7 +637,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	/* We must not fragment if the socket is set to force MTU discovery
 	 * or if the skb it not generated by a local socket.
 	 */
-	if (unlikely(!skb->local_df && skb->len > mtu)) {
+	if (unlikely(!skb->local_df && skb->len > mtu) ||
+		     (IP6CB(skb)->frag_max_size &&
+		      IP6CB(skb)->frag_max_size > mtu)) {
 		if (skb->sk && dst_allfrag(skb_dst(skb)))
 			sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4794f96cf2e0..521ddca876f8 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -153,10 +153,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
 	const struct nf_conn_help *help;
 	const struct nf_conntrack_helper *helper;
 	enum ip_conntrack_info ctinfo;
-	unsigned int ret, protoff;
-	unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
-	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
-
+	unsigned int ret;
+	__be16 frag_off;
+	int protoff;
+	u8 nexthdr;
 
 	/* This is where we call the helper: as the packet goes out. */
 	ct = nf_ct_get(skb, &ctinfo);
@@ -171,9 +171,10 @@ static unsigned int ipv6_helper(unsigned int hooknum,
 	if (!helper)
 		return NF_ACCEPT;
 
-	protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
-					 skb->len - extoff);
-	if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
+	nexthdr = ipv6_hdr(skb)->nexthdr;
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
 		pr_debug("proto header not found\n");
 		return NF_ACCEPT;
 	}
@@ -199,9 +200,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 static unsigned int __ipv6_conntrack_in(struct net *net,
 					unsigned int hooknum,
 					struct sk_buff *skb,
+					const struct net_device *in,
+					const struct net_device *out,
 					int (*okfn)(struct sk_buff *))
 {
 	struct sk_buff *reasm = skb->nfct_reasm;
+	const struct nf_conn_help *help;
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
 
 	/* This packet is fragmented and has reassembled packet. */
 	if (reasm) {
@@ -213,6 +219,23 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
 			if (ret != NF_ACCEPT)
 				return ret;
 		}
+
+		/* Conntrack helpers need the entire reassembled packet in the
+		 * POST_ROUTING hook.
+		 */
+		ct = nf_ct_get(reasm, &ctinfo);
+		if (ct != NULL && !nf_ct_is_untracked(ct)) {
+			help = nfct_help(ct);
+			if (help && help->helper) {
+				nf_conntrack_get_reasm(skb);
+				NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
+					       (struct net_device *)in,
+					       (struct net_device *)out,
+					       okfn, NF_IP6_PRI_CONNTRACK + 1);
+				return NF_DROP_ERR(-ECANCELED);
+			}
+		}
+
 		nf_conntrack_get(reasm->nfct);
 		skb->nfct = reasm->nfct;
 		skb->nfctinfo = reasm->nfctinfo;
@@ -228,7 +251,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn);
+	return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
 }
 
 static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -242,7 +265,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
+	return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c9c78c2e666b..f94fb3ac2a79 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,6 +190,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 			     const struct frag_hdr *fhdr, int nhoff)
 {
 	struct sk_buff *prev, *next;
+	unsigned int payload_len;
 	int offset, end;
 
 	if (fq->q.last_in & INET_FRAG_COMPLETE) {
@@ -197,8 +198,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		goto err;
 	}
 
+	payload_len = ntohs(ipv6_hdr(skb)->payload_len);
+
 	offset = ntohs(fhdr->frag_off) & ~0x7;
-	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+	end = offset + (payload_len -
 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
@@ -307,6 +310,8 @@ found:
 	skb->dev = NULL;
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
+	if (payload_len > fq->q.max_size)
+		fq->q.max_size = payload_len;
 	atomic_add(skb->truesize, &nf_init_frags.mem);
 
 	/* The first fragment.
@@ -412,10 +417,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	}
 	atomic_sub(head->truesize, &nf_init_frags.mem);
 
+	head->local_df = 1;
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
+	IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -592,6 +599,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
 			int (*okfn)(struct sk_buff *))
 {
 	struct sk_buff *s, *s2;
+	unsigned int ret = 0;
 
 	for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
 		nf_conntrack_put_reasm(s->nfct_reasm);
@@ -601,8 +609,13 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
 		s2 = s->next;
 		s->next = NULL;
 
-		NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
-			       NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+		if (ret != -ECANCELED)
+			ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
+					     in, out, okfn,
+					     NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
+		else
+			kfree_skb(s);
+
 		s = s2;
 	}
 	nf_conntrack_put_reasm(skb);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 67a39786b0a1..56f6d5d81a77 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -88,7 +88,14 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
 static inline bool
 __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
 {
-	if (skb->len > mtu && !skb_is_gso(skb)) {
+	if (IP6CB(skb)->frag_max_size) {
+		/* frag_max_size tell us that, this packet have been
+		 * defragmented by netfilter IPv6 conntrack module.
+		 */
+		if (IP6CB(skb)->frag_max_size > mtu)
+			return true; /* largest fragment violate MTU */
+	}
+	else if (skb->len > mtu && !skb_is_gso(skb)) {
 		return true; /* Packet size violate MTU size */
 	}
 	return false;
-- 
cgit v1.2.3


From 051966c0c644a1c96092d4206e00704ade813c9a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:14:04 +0200
Subject: netfilter: nf_nat: add protoff argument to packet mangling functions

For mangling IPv6 packets the protocol header offset needs to be known
by the NAT packet mangling functions. Add a so far unused protoff argument
and convert the conntrack and NAT helpers to use it in preparation of
IPv6 NAT.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_amanda.h  |   1 +
 include/linux/netfilter/nf_conntrack_ftp.h     |   1 +
 include/linux/netfilter/nf_conntrack_h323.h    |  15 +-
 include/linux/netfilter/nf_conntrack_irc.h     |   1 +
 include/linux/netfilter/nf_conntrack_pptp.h    |   2 +
 include/linux/netfilter/nf_conntrack_sip.h     |  12 +-
 include/net/netfilter/nf_nat_helper.h          |  11 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |   6 +-
 net/ipv4/netfilter/nf_nat_amanda.c             |   3 +-
 net/ipv4/netfilter/nf_nat_ftp.c                |   3 +-
 net/ipv4/netfilter/nf_nat_h323.c               |  48 ++++---
 net/ipv4/netfilter/nf_nat_helper.c             |   9 +-
 net/ipv4/netfilter/nf_nat_irc.c                |   3 +-
 net/ipv4/netfilter/nf_nat_pptp.c               |   6 +-
 net/ipv4/netfilter/nf_nat_sip.c                |  96 +++++++------
 net/netfilter/ipvs/ip_vs_ftp.c                 |   1 +
 net/netfilter/nf_conntrack_amanda.c            |   5 +-
 net/netfilter/nf_conntrack_ftp.c               |   3 +-
 net/netfilter/nf_conntrack_h323_main.c         | 191 +++++++++++++++----------
 net/netfilter/nf_conntrack_irc.c               |   3 +-
 net/netfilter/nf_conntrack_pptp.c              |  18 +--
 net/netfilter/nf_conntrack_sip.c               |  95 +++++++-----
 22 files changed, 328 insertions(+), 205 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_amanda.h b/include/linux/netfilter/nf_conntrack_amanda.h
index 0bb5a6976bf3..4b59a1584959 100644
--- a/include/linux/netfilter/nf_conntrack_amanda.h
+++ b/include/linux/netfilter/nf_conntrack_amanda.h
@@ -4,6 +4,7 @@
 
 extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
 					  enum ip_conntrack_info ctinfo,
+					  unsigned int protoff,
 					  unsigned int matchoff,
 					  unsigned int matchlen,
 					  struct nf_conntrack_expect *exp);
diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h
index 3e3aa08980c3..28f18df36525 100644
--- a/include/linux/netfilter/nf_conntrack_ftp.h
+++ b/include/linux/netfilter/nf_conntrack_ftp.h
@@ -34,6 +34,7 @@ struct nf_conntrack_expect;
 extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
 				       enum ip_conntrack_info ctinfo,
 				       enum nf_ct_ftp_type type,
+				       unsigned int protoff,
 				       unsigned int matchoff,
 				       unsigned int matchlen,
 				       struct nf_conntrack_expect *exp);
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index 26f9226ea72b..f381020eee92 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -36,12 +36,12 @@ extern void nf_conntrack_h245_expect(struct nf_conn *new,
 				     struct nf_conntrack_expect *this);
 extern void nf_conntrack_q931_expect(struct nf_conn *new,
 				     struct nf_conntrack_expect *this);
-extern int (*set_h245_addr_hook) (struct sk_buff *skb,
+extern int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff,
 				  unsigned char **data, int dataoff,
 				  H245_TransportAddress *taddr,
 				  union nf_inet_addr *addr,
 				  __be16 port);
-extern int (*set_h225_addr_hook) (struct sk_buff *skb,
+extern int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff,
 				  unsigned char **data, int dataoff,
 				  TransportAddress *taddr,
 				  union nf_inet_addr *addr,
@@ -49,40 +49,45 @@ extern int (*set_h225_addr_hook) (struct sk_buff *skb,
 extern int (*set_sig_addr_hook) (struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
-				 unsigned char **data,
+				 unsigned int protoff, unsigned char **data,
 				 TransportAddress *taddr, int count);
 extern int (*set_ras_addr_hook) (struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
-				 unsigned char **data,
+				 unsigned int protoff, unsigned char **data,
 				 TransportAddress *taddr, int count);
 extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
-				 unsigned char **data, int dataoff,
+				 unsigned int protoff, unsigned char **data,
+				 int dataoff,
 				 H245_TransportAddress *taddr,
 				 __be16 port, __be16 rtp_port,
 				 struct nf_conntrack_expect *rtp_exp,
 				 struct nf_conntrack_expect *rtcp_exp);
 extern int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct,
 			     enum ip_conntrack_info ctinfo,
+			     unsigned int protoff,
 			     unsigned char **data, int dataoff,
 			     H245_TransportAddress *taddr, __be16 port,
 			     struct nf_conntrack_expect *exp);
 extern int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct,
 			     enum ip_conntrack_info ctinfo,
+			     unsigned int protoff,
 			     unsigned char **data, int dataoff,
 			     TransportAddress *taddr, __be16 port,
 			     struct nf_conntrack_expect *exp);
 extern int (*nat_callforwarding_hook) (struct sk_buff *skb,
 				       struct nf_conn *ct,
 				       enum ip_conntrack_info ctinfo,
+				       unsigned int protoff,
 				       unsigned char **data, int dataoff,
 				       TransportAddress *taddr,
 				       __be16 port,
 				       struct nf_conntrack_expect *exp);
 extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct,
 			     enum ip_conntrack_info ctinfo,
+			     unsigned int protoff,
 			     unsigned char **data, TransportAddress *taddr,
 			     int idx, __be16 port,
 			     struct nf_conntrack_expect *exp);
diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h
index 36282bf71b63..4bb9bae67176 100644
--- a/include/linux/netfilter/nf_conntrack_irc.h
+++ b/include/linux/netfilter/nf_conntrack_irc.h
@@ -7,6 +7,7 @@
 
 extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
 				       enum ip_conntrack_info ctinfo,
+				       unsigned int protoff,
 				       unsigned int matchoff,
 				       unsigned int matchlen,
 				       struct nf_conntrack_expect *exp);
diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h
index 3bbde0c3a8a6..2ab2830316b7 100644
--- a/include/linux/netfilter/nf_conntrack_pptp.h
+++ b/include/linux/netfilter/nf_conntrack_pptp.h
@@ -303,12 +303,14 @@ struct nf_conntrack_expect;
 extern int
 (*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
 			     struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			     unsigned int protoff,
 			     struct PptpControlHeader *ctlh,
 			     union pptp_ctrl_union *pptpReq);
 
 extern int
 (*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
 			    struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			    unsigned int protoff,
 			    struct PptpControlHeader *ctlh,
 			    union pptp_ctrl_union *pptpReq);
 
diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 89f2a627f3f0..1afc669a393e 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -37,10 +37,12 @@ struct sdp_media_type {
 struct sip_handler {
 	const char	*method;
 	unsigned int	len;
-	int		(*request)(struct sk_buff *skb, unsigned int dataoff,
+	int		(*request)(struct sk_buff *skb, unsigned int protoff,
+				   unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq);
-	int		(*response)(struct sk_buff *skb, unsigned int dataoff,
+	int		(*response)(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
 				    const char **dptr, unsigned int *datalen,
 				    unsigned int cseq, unsigned int code);
 };
@@ -105,11 +107,13 @@ enum sdp_header_types {
 };
 
 extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
+				       unsigned int protoff,
 				       unsigned int dataoff,
 				       const char **dptr,
 				       unsigned int *datalen);
 extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off);
 extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+					      unsigned int protoff,
 					      unsigned int dataoff,
 					      const char **dptr,
 					      unsigned int *datalen,
@@ -117,6 +121,7 @@ extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 					      unsigned int matchoff,
 					      unsigned int matchlen);
 extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
+					    unsigned int protoff,
 					    unsigned int dataoff,
 					    const char **dptr,
 					    unsigned int *datalen,
@@ -125,6 +130,7 @@ extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb,
 					    enum sdp_header_types term,
 					    const union nf_inet_addr *addr);
 extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
+					    unsigned int protoff,
 					    unsigned int dataoff,
 					    const char **dptr,
 					    unsigned int *datalen,
@@ -132,12 +138,14 @@ extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb,
 					    unsigned int matchlen,
 					    u_int16_t port);
 extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
+					       unsigned int protoff,
 					       unsigned int dataoff,
 					       const char **dptr,
 					       unsigned int *datalen,
 					       unsigned int sdpoff,
 					       const union nf_inet_addr *addr);
 extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb,
+					     unsigned int protoff,
 					     unsigned int dataoff,
 					     const char **dptr,
 					     unsigned int *datalen,
diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h
index 7d8fb7b46c44..b4d6bfc2af03 100644
--- a/include/net/netfilter/nf_nat_helper.h
+++ b/include/net/netfilter/nf_nat_helper.h
@@ -10,6 +10,7 @@ struct sk_buff;
 extern int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 				      struct nf_conn *ct,
 				      enum ip_conntrack_info ctinfo,
+				      unsigned int protoff,
 				      unsigned int match_offset,
 				      unsigned int match_len,
 				      const char *rep_buffer,
@@ -18,12 +19,13 @@ extern int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 static inline int nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 					   struct nf_conn *ct,
 					   enum ip_conntrack_info ctinfo,
+					   unsigned int protoff,
 					   unsigned int match_offset,
 					   unsigned int match_len,
 					   const char *rep_buffer,
 					   unsigned int rep_len)
 {
-	return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+	return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
 					  match_offset, match_len,
 					  rep_buffer, rep_len, true);
 }
@@ -31,6 +33,7 @@ static inline int nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 extern int nf_nat_mangle_udp_packet(struct sk_buff *skb,
 				    struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
+				    unsigned int protoff,
 				    unsigned int match_offset,
 				    unsigned int match_len,
 				    const char *rep_buffer,
@@ -41,10 +44,12 @@ extern void nf_nat_set_seq_adjust(struct nf_conn *ct,
 				  __be32 seq, s16 off);
 extern int nf_nat_seq_adjust(struct sk_buff *skb,
 			     struct nf_conn *ct,
-			     enum ip_conntrack_info ctinfo);
+			     enum ip_conntrack_info ctinfo,
+			     unsigned int protoff);
 extern int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
 				     struct nf_conn *ct,
-				     enum ip_conntrack_info ctinfo);
+				     enum ip_conntrack_info ctinfo,
+				     unsigned int protoff);
 
 /* Setup NAT on this expected conntrack so it follows master, but goes
  * to port ct->master->saved_proto. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index e7ff2dcab6ce..4ada3295d9a7 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -31,7 +31,8 @@
 
 int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
 			      struct nf_conn *ct,
-			      enum ip_conntrack_info ctinfo);
+			      enum ip_conntrack_info ctinfo,
+			      unsigned int protoff);
 EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
 
 static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
@@ -149,7 +150,8 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
 		typeof(nf_nat_seq_adjust_hook) seq_adjust;
 
 		seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
-		if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) {
+		if (!seq_adjust ||
+		    !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
 			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
 			return NF_DROP;
 		}
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 3c04d24e2976..75464b62f5f2 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -26,6 +26,7 @@ MODULE_ALIAS("ip_nat_amanda");
 
 static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
 			 unsigned int matchoff,
 			 unsigned int matchlen,
 			 struct nf_conntrack_expect *exp)
@@ -61,7 +62,7 @@ static unsigned int help(struct sk_buff *skb,
 
 	sprintf(buffer, "%u", port);
 	ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
-				       matchoff, matchlen,
+				       protoff, matchoff, matchlen,
 				       buffer, strlen(buffer));
 	if (ret != NF_ACCEPT)
 		nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index e462a957d080..5589f3af4a8e 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -55,6 +55,7 @@ static int nf_nat_ftp_fmt_cmd(enum nf_ct_ftp_type type,
 static unsigned int nf_nat_ftp(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       enum nf_ct_ftp_type type,
+			       unsigned int protoff,
 			       unsigned int matchoff,
 			       unsigned int matchlen,
 			       struct nf_conntrack_expect *exp)
@@ -100,7 +101,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 
 	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
-	if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
+	if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
 				      matchlen, buffer, buflen))
 		goto out;
 
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index c6784a18c1c4..d2c228db38b5 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -21,7 +21,7 @@
 #include <linux/netfilter/nf_conntrack_h323.h>
 
 /****************************************************************************/
-static int set_addr(struct sk_buff *skb,
+static int set_addr(struct sk_buff *skb, unsigned int protoff,
 		    unsigned char **data, int dataoff,
 		    unsigned int addroff, __be32 ip, __be16 port)
 {
@@ -40,7 +40,7 @@ static int set_addr(struct sk_buff *skb,
 
 	if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
 		if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
-					      addroff, sizeof(buf),
+					      protoff, addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n");
 			return -1;
@@ -54,7 +54,7 @@ static int set_addr(struct sk_buff *skb,
 		*data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
 	} else {
 		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
-					      addroff, sizeof(buf),
+					      protoff, addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
 			return -1;
@@ -69,22 +69,22 @@ static int set_addr(struct sk_buff *skb,
 }
 
 /****************************************************************************/
-static int set_h225_addr(struct sk_buff *skb,
+static int set_h225_addr(struct sk_buff *skb, unsigned int protoff,
 			 unsigned char **data, int dataoff,
 			 TransportAddress *taddr,
 			 union nf_inet_addr *addr, __be16 port)
 {
-	return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
+	return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip,
 			addr->ip, port);
 }
 
 /****************************************************************************/
-static int set_h245_addr(struct sk_buff *skb,
+static int set_h245_addr(struct sk_buff *skb, unsigned protoff,
 			 unsigned char **data, int dataoff,
 			 H245_TransportAddress *taddr,
 			 union nf_inet_addr *addr, __be16 port)
 {
-	return set_addr(skb, data, dataoff,
+	return set_addr(skb, protoff, data, dataoff,
 			taddr->unicastAddress.iPAddress.network,
 			addr->ip, port);
 }
@@ -92,7 +92,7 @@ static int set_h245_addr(struct sk_buff *skb,
 /****************************************************************************/
 static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
-			unsigned char **data,
+			unsigned int protoff, unsigned char **data,
 			TransportAddress *taddr, int count)
 {
 	const struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -118,7 +118,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
 					 &addr.ip, port,
 					 &ct->tuplehash[!dir].tuple.dst.u3.ip,
 					 info->sig_port[!dir]);
-				return set_h225_addr(skb, data, 0, &taddr[i],
+				return set_h225_addr(skb, protoff, data, 0,
+						     &taddr[i],
 						     &ct->tuplehash[!dir].
 						     tuple.dst.u3,
 						     info->sig_port[!dir]);
@@ -129,7 +130,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
 					 &addr.ip, port,
 					 &ct->tuplehash[!dir].tuple.src.u3.ip,
 					 info->sig_port[!dir]);
-				return set_h225_addr(skb, data, 0, &taddr[i],
+				return set_h225_addr(skb, protoff, data, 0,
+						     &taddr[i],
 						     &ct->tuplehash[!dir].
 						     tuple.src.u3,
 						     info->sig_port[!dir]);
@@ -143,7 +145,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
-			unsigned char **data,
+			unsigned int protoff, unsigned char **data,
 			TransportAddress *taddr, int count)
 {
 	int dir = CTINFO2DIR(ctinfo);
@@ -159,7 +161,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
 				 &addr.ip, ntohs(port),
 				 &ct->tuplehash[!dir].tuple.dst.u3.ip,
 				 ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
-			return set_h225_addr(skb, data, 0, &taddr[i],
+			return set_h225_addr(skb, protoff, data, 0, &taddr[i],
 					     &ct->tuplehash[!dir].tuple.dst.u3,
 					     ct->tuplehash[!dir].tuple.
 								dst.u.udp.port);
@@ -172,7 +174,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
+			unsigned int protoff, unsigned char **data, int dataoff,
 			H245_TransportAddress *taddr,
 			__be16 port, __be16 rtp_port,
 			struct nf_conntrack_expect *rtp_exp,
@@ -244,7 +246,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (set_h245_addr(skb, data, dataoff, taddr,
+	if (set_h245_addr(skb, protoff, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons((port & htons(1)) ? nated_port + 1 :
 						    nated_port)) == 0) {
@@ -275,7 +277,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, int dataoff,
+		    unsigned int protoff, unsigned char **data, int dataoff,
 		    H245_TransportAddress *taddr, __be16 port,
 		    struct nf_conntrack_expect *exp)
 {
@@ -307,7 +309,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (set_h245_addr(skb, data, dataoff, taddr,
+	if (set_h245_addr(skb, protoff, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) < 0) {
 		nf_ct_unexpect_related(exp);
@@ -326,7 +328,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, int dataoff,
+		    unsigned int protoff, unsigned char **data, int dataoff,
 		    TransportAddress *taddr, __be16 port,
 		    struct nf_conntrack_expect *exp)
 {
@@ -363,7 +365,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (set_h225_addr(skb, data, dataoff, taddr,
+	if (set_h225_addr(skb, protoff, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) == 0) {
 		/* Save ports */
@@ -416,7 +418,8 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 /****************************************************************************/
 static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, TransportAddress *taddr, int idx,
+		    unsigned int protoff, unsigned char **data,
+		    TransportAddress *taddr, int idx,
 		    __be16 port, struct nf_conntrack_expect *exp)
 {
 	struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -453,7 +456,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (set_h225_addr(skb, data, 0, &taddr[idx],
+	if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) == 0) {
 		/* Save ports */
@@ -464,7 +467,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 		if (idx > 0 &&
 		    get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
 		    (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
-			set_h225_addr(skb, data, 0, &taddr[0],
+			set_h225_addr(skb, protoff, data, 0, &taddr[0],
 				      &ct->tuplehash[!dir].tuple.dst.u3,
 				      info->sig_port[!dir]);
 		}
@@ -507,6 +510,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
 /****************************************************************************/
 static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 			      enum ip_conntrack_info ctinfo,
+			      unsigned int protoff,
 			      unsigned char **data, int dataoff,
 			      TransportAddress *taddr, __be16 port,
 			      struct nf_conntrack_expect *exp)
@@ -541,7 +545,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (!set_h225_addr(skb, data, dataoff, taddr,
+	if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
 			   &ct->tuplehash[!dir].tuple.dst.u3,
 			   htons(nated_port)) == 0) {
 		nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 2e59ad0b90ca..2fefec5e757c 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -206,6 +206,7 @@ static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data
 int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 			       struct nf_conn *ct,
 			       enum ip_conntrack_info ctinfo,
+			       unsigned int protoff,
 			       unsigned int match_offset,
 			       unsigned int match_len,
 			       const char *rep_buffer,
@@ -257,6 +258,7 @@ int
 nf_nat_mangle_udp_packet(struct sk_buff *skb,
 			 struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
 			 unsigned int match_offset,
 			 unsigned int match_len,
 			 const char *rep_buffer,
@@ -387,7 +389,8 @@ nf_nat_sack_adjust(struct sk_buff *skb,
 int
 nf_nat_seq_adjust(struct sk_buff *skb,
 		  struct nf_conn *ct,
-		  enum ip_conntrack_info ctinfo)
+		  enum ip_conntrack_info ctinfo,
+		  unsigned int protoff)
 {
 	struct tcphdr *tcph;
 	int dir;
@@ -401,10 +404,10 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 	this_way = &nat->seq[dir];
 	other_way = &nat->seq[!dir];
 
-	if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+	if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
 		return 0;
 
-	tcph = (void *)skb->data + ip_hdrlen(skb);
+	tcph = (void *)skb->data + protoff;
 	if (after(ntohl(tcph->seq), this_way->correction_pos))
 		seqoff = this_way->offset_after;
 	else
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 979ae165f4ef..5b0c20a1f08d 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -29,6 +29,7 @@ MODULE_ALIAS("ip_nat_irc");
 
 static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
 			 unsigned int matchoff,
 			 unsigned int matchlen,
 			 struct nf_conntrack_expect *exp)
@@ -66,7 +67,7 @@ static unsigned int help(struct sk_buff *skb,
 		 buffer, &ip, port);
 
 	ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
-				       matchoff, matchlen, buffer,
+				       protoff, matchoff, matchlen, buffer,
 				       strlen(buffer));
 	if (ret != NF_ACCEPT)
 		nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 388140881ebe..31ef890d894b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -113,6 +113,7 @@ static int
 pptp_outbound_pkt(struct sk_buff *skb,
 		  struct nf_conn *ct,
 		  enum ip_conntrack_info ctinfo,
+		  unsigned int protoff,
 		  struct PptpControlHeader *ctlh,
 		  union pptp_ctrl_union *pptpReq)
 
@@ -175,7 +176,7 @@ pptp_outbound_pkt(struct sk_buff *skb,
 		 ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
 
 	/* mangle packet */
-	if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+	if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
 				     cid_off + sizeof(struct pptp_pkt_hdr) +
 				     sizeof(struct PptpControlHeader),
 				     sizeof(new_callid), (char *)&new_callid,
@@ -216,6 +217,7 @@ static int
 pptp_inbound_pkt(struct sk_buff *skb,
 		 struct nf_conn *ct,
 		 enum ip_conntrack_info ctinfo,
+		 unsigned int protoff,
 		 struct PptpControlHeader *ctlh,
 		 union pptp_ctrl_union *pptpReq)
 {
@@ -268,7 +270,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
 	pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
 		 ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
 
-	if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+	if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
 				     pcid_off + sizeof(struct pptp_pkt_hdr) +
 				     sizeof(struct PptpControlHeader),
 				     sizeof(new_pcid), (char *)&new_pcid,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 4ad9cf173992..df626af8413c 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -30,7 +30,8 @@ MODULE_DESCRIPTION("SIP NAT helper");
 MODULE_ALIAS("ip_nat_sip");
 
 
-static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff,
+				  unsigned int dataoff,
 				  const char **dptr, unsigned int *datalen,
 				  unsigned int matchoff, unsigned int matchlen,
 				  const char *buffer, unsigned int buflen)
@@ -46,7 +47,7 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
 		matchoff += dataoff - baseoff;
 
 		if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
-						matchoff, matchlen,
+						protoff, matchoff, matchlen,
 						buffer, buflen, false))
 			return 0;
 	} else {
@@ -54,7 +55,7 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
 		matchoff += dataoff - baseoff;
 
 		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
-					      matchoff, matchlen,
+					      protoff, matchoff, matchlen,
 					      buffer, buflen))
 			return 0;
 	}
@@ -65,7 +66,8 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff,
 	return 1;
 }
 
-static int map_addr(struct sk_buff *skb, unsigned int dataoff,
+static int map_addr(struct sk_buff *skb, unsigned int protoff,
+		    unsigned int dataoff,
 		    const char **dptr, unsigned int *datalen,
 		    unsigned int matchoff, unsigned int matchlen,
 		    union nf_inet_addr *addr, __be16 port)
@@ -94,11 +96,12 @@ static int map_addr(struct sk_buff *skb, unsigned int dataoff,
 
 	buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
 
-	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
-			     buffer, buflen);
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen);
 }
 
-static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
+static int map_sip_addr(struct sk_buff *skb, unsigned int protoff,
+			unsigned int dataoff,
 			const char **dptr, unsigned int *datalen,
 			enum sip_header_types type)
 {
@@ -111,11 +114,12 @@ static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff,
 	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
 				    &matchoff, &matchlen, &addr, &port) <= 0)
 		return 1;
-	return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
-			&addr, port);
+	return map_addr(skb, protoff, dataoff, dptr, datalen,
+			matchoff, matchlen, &addr, port);
 }
 
-static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int protoff,
+			       unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -132,8 +136,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		if (ct_sip_parse_request(ct, *dptr, *datalen,
 					 &matchoff, &matchlen,
 					 &addr, &port) > 0 &&
-		    !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
-			      &addr, port))
+		    !map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen, &addr, port))
 			return NF_DROP;
 		request = 1;
 	} else
@@ -164,8 +168,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		}
 
 		olen = *datalen;
-		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
-			      &addr, port))
+		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen, &addr, port))
 			return NF_DROP;
 
 		matchend = matchoff + matchlen + *datalen - olen;
@@ -179,7 +183,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		    addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
 					&ct->tuplehash[!dir].tuple.dst.u3.ip);
-			if (!mangle_packet(skb, dataoff, dptr, datalen,
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
 					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
@@ -193,7 +197,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		    addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
 			buflen = sprintf(buffer, "%pI4",
 					&ct->tuplehash[!dir].tuple.src.u3.ip);
-			if (!mangle_packet(skb, dataoff, dptr, datalen,
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
 					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
@@ -207,7 +211,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff,
 		    htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
 			__be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
 			buflen = sprintf(buffer, "%u", ntohs(p));
-			if (!mangle_packet(skb, dataoff, dptr, datalen,
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
 					   poff, plen, buffer, buflen))
 				return NF_DROP;
 		}
@@ -221,13 +225,14 @@ next:
 				       SIP_HDR_CONTACT, &in_header,
 				       &matchoff, &matchlen,
 				       &addr, &port) > 0) {
-		if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen,
+		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen,
 			      &addr, port))
 			return NF_DROP;
 	}
 
-	if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) ||
-	    !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO))
+	if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) ||
+	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO))
 		return NF_DROP;
 
 	return NF_ACCEPT;
@@ -272,7 +277,8 @@ static void ip_nat_sip_expected(struct nf_conn *ct,
 	}
 }
 
-static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
+				      unsigned int dataoff,
 				      const char **dptr, unsigned int *datalen,
 				      struct nf_conntrack_expect *exp,
 				      unsigned int matchoff,
@@ -326,7 +332,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
 	if (exp->tuple.dst.u3.ip != exp->saved_ip ||
 	    exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
 		buflen = sprintf(buffer, "%pI4:%u", &newip, port);
-		if (!mangle_packet(skb, dataoff, dptr, datalen,
+		if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
 				   matchoff, matchlen, buffer, buflen))
 			goto err;
 	}
@@ -337,7 +343,8 @@ err:
 	return NF_DROP;
 }
 
-static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
+static int mangle_content_len(struct sk_buff *skb, unsigned int protoff,
+			      unsigned int dataoff,
 			      const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -359,11 +366,12 @@ static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff,
 		return 0;
 
 	buflen = sprintf(buffer, "%u", c_len);
-	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
-			     buffer, buflen);
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen);
 }
 
-static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
+static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff,
+			     unsigned int dataoff,
 			     const char **dptr, unsigned int *datalen,
 			     unsigned int sdpoff,
 			     enum sdp_header_types type,
@@ -377,11 +385,12 @@ static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff,
 	if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
 				  &matchoff, &matchlen) <= 0)
 		return -ENOENT;
-	return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
-			     buffer, buflen) ? 0 : -EINVAL;
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL;
 }
 
-static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
 				    const char **dptr, unsigned int *datalen,
 				    unsigned int sdpoff,
 				    enum sdp_header_types type,
@@ -392,14 +401,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff,
 	unsigned int buflen;
 
 	buflen = sprintf(buffer, "%pI4", &addr->ip);
-	if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term,
-			      buffer, buflen))
+	if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen,
+			      sdpoff, type, term, buffer, buflen))
 		return 0;
 
-	return mangle_content_len(skb, dataoff, dptr, datalen);
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
 }
 
-static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
 				    const char **dptr, unsigned int *datalen,
 				    unsigned int matchoff,
 				    unsigned int matchlen,
@@ -409,14 +419,15 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff,
 	unsigned int buflen;
 
 	buflen = sprintf(buffer, "%u", port);
-	if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen,
-			   buffer, buflen))
+	if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			   matchoff, matchlen, buffer, buflen))
 		return 0;
 
-	return mangle_content_len(skb, dataoff, dptr, datalen);
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
 }
 
-static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int protoff,
+				       unsigned int dataoff,
 				       const char **dptr, unsigned int *datalen,
 				       unsigned int sdpoff,
 				       const union nf_inet_addr *addr)
@@ -426,12 +437,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff
 
 	/* Mangle session description owner and contact addresses */
 	buflen = sprintf(buffer, "%pI4", &addr->ip);
-	if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
+	if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
 			       SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
 			       buffer, buflen))
 		return 0;
 
-	switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff,
+	switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
 				  SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
 				  buffer, buflen)) {
 	case 0:
@@ -448,12 +459,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff
 		return 0;
 	}
 
-	return mangle_content_len(skb, dataoff, dptr, datalen);
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
 }
 
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
+static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
+				     unsigned int dataoff,
 				     const char **dptr, unsigned int *datalen,
 				     struct nf_conntrack_expect *rtp_exp,
 				     struct nf_conntrack_expect *rtcp_exp,
@@ -514,7 +526,7 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
 
 	/* Update media port. */
 	if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
-	    !ip_nat_sdp_port(skb, dataoff, dptr, datalen,
+	    !ip_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
 			     mediaoff, medialen, port))
 		goto err2;
 
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index ad70b7e4ac4a..4f53a5f04437 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -268,6 +268,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 			 * packet.
 			 */
 			ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+						       iph->ihl * 4,
 						       start-data, end-start,
 						       buf, buf_len);
 			if (ret) {
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 184c0dc6e437..e0212b5494b1 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -40,6 +40,7 @@ MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
 
 unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
 				   enum ip_conntrack_info ctinfo,
+				   unsigned int protoff,
 				   unsigned int matchoff,
 				   unsigned int matchlen,
 				   struct nf_conntrack_expect *exp)
@@ -156,8 +157,8 @@ static int amanda_help(struct sk_buff *skb,
 		nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
 		if (nf_nat_amanda && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 		    ct->status & IPS_NAT_MASK)
-			ret = nf_nat_amanda(skb, ctinfo, off - dataoff,
-					    len, exp);
+			ret = nf_nat_amanda(skb, ctinfo, protoff,
+					    off - dataoff, len, exp);
 		else if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
 		nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 3e1587e63c03..c0f4a5ba9016 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -48,6 +48,7 @@ module_param(loose, bool, 0600);
 unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				enum nf_ct_ftp_type type,
+				unsigned int protoff,
 				unsigned int matchoff,
 				unsigned int matchlen,
 				struct nf_conntrack_expect *exp);
@@ -490,7 +491,7 @@ static int help(struct sk_buff *skb,
 	if (nf_nat_ftp && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK)
 		ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
-				 matchoff, matchlen, exp);
+				 protoff, matchoff, matchlen, exp);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
 		if (nf_ct_expect_related(exp) != 0)
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 517c5e3fe7c6..1b30b0dee708 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -49,12 +49,12 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
 				     "(determined by routing information)");
 
 /* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff *skb,
+int (*set_h245_addr_hook) (struct sk_buff *skb, unsigned int protoff,
 			   unsigned char **data, int dataoff,
 			   H245_TransportAddress *taddr,
 			   union nf_inet_addr *addr, __be16 port)
 			   __read_mostly;
-int (*set_h225_addr_hook) (struct sk_buff *skb,
+int (*set_h225_addr_hook) (struct sk_buff *skb, unsigned int protoff,
 			   unsigned char **data, int dataoff,
 			   TransportAddress *taddr,
 			   union nf_inet_addr *addr, __be16 port)
@@ -62,16 +62,17 @@ int (*set_h225_addr_hook) (struct sk_buff *skb,
 int (*set_sig_addr_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
-			  unsigned char **data,
+			  unsigned int protoff, unsigned char **data,
 			  TransportAddress *taddr, int count) __read_mostly;
 int (*set_ras_addr_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
-			  unsigned char **data,
+			  unsigned int protoff, unsigned char **data,
 			  TransportAddress *taddr, int count) __read_mostly;
 int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
+			  unsigned int protoff,
 			  unsigned char **data, int dataoff,
 			  H245_TransportAddress *taddr,
 			  __be16 port, __be16 rtp_port,
@@ -80,24 +81,28 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
 int (*nat_t120_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
+		      unsigned int protoff,
 		      unsigned char **data, int dataoff,
 		      H245_TransportAddress *taddr, __be16 port,
 		      struct nf_conntrack_expect *exp) __read_mostly;
 int (*nat_h245_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
+		      unsigned int protoff,
 		      unsigned char **data, int dataoff,
 		      TransportAddress *taddr, __be16 port,
 		      struct nf_conntrack_expect *exp) __read_mostly;
 int (*nat_callforwarding_hook) (struct sk_buff *skb,
 				struct nf_conn *ct,
 				enum ip_conntrack_info ctinfo,
+				unsigned int protoff,
 				unsigned char **data, int dataoff,
 				TransportAddress *taddr, __be16 port,
 				struct nf_conntrack_expect *exp) __read_mostly;
 int (*nat_q931_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
+		      unsigned int protoff,
 		      unsigned char **data, TransportAddress *taddr, int idx,
 		      __be16 port, struct nf_conntrack_expect *exp)
 		      __read_mostly;
@@ -251,6 +256,7 @@ static int get_h245_addr(struct nf_conn *ct, const unsigned char *data,
 /****************************************************************************/
 static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
+			   unsigned int protoff,
 			   unsigned char **data, int dataoff,
 			   H245_TransportAddress *taddr)
 {
@@ -298,7 +304,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 		   nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 		   ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+		ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
 				   taddr, port, rtp_port, rtp_exp, rtcp_exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -325,6 +331,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 static int expect_t120(struct sk_buff *skb,
 		       struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, int dataoff,
 		       H245_TransportAddress *taddr)
 {
@@ -357,7 +364,7 @@ static int expect_t120(struct sk_buff *skb,
 	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr,
+		ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -376,6 +383,7 @@ static int expect_t120(struct sk_buff *skb,
 static int process_h245_channel(struct sk_buff *skb,
 				struct nf_conn *ct,
 				enum ip_conntrack_info ctinfo,
+				unsigned int protoff,
 				unsigned char **data, int dataoff,
 				H2250LogicalChannelParameters *channel)
 {
@@ -383,7 +391,7 @@ static int process_h245_channel(struct sk_buff *skb,
 
 	if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
 		/* RTP */
-		ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+		ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
 				      &channel->mediaChannel);
 		if (ret < 0)
 			return -1;
@@ -392,7 +400,7 @@ static int process_h245_channel(struct sk_buff *skb,
 	if (channel->
 	    options & eH2250LogicalChannelParameters_mediaControlChannel) {
 		/* RTCP */
-		ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+		ret = expect_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
 				      &channel->mediaControlChannel);
 		if (ret < 0)
 			return -1;
@@ -404,6 +412,7 @@ static int process_h245_channel(struct sk_buff *skb,
 /****************************************************************************/
 static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, int dataoff,
 		       OpenLogicalChannel *olc)
 {
@@ -414,7 +423,8 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
 	if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
 	    eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
 	{
-		ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
+		ret = process_h245_channel(skb, ct, ctinfo,
+					   protoff, data, dataoff,
 					   &olc->
 					   forwardLogicalChannelParameters.
 					   multiplexParameters.
@@ -432,7 +442,8 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
 		eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
 	{
 		ret =
-		    process_h245_channel(skb, ct, ctinfo, data, dataoff,
+		    process_h245_channel(skb, ct, ctinfo,
+					 protoff, data, dataoff,
 					 &olc->
 					 reverseLogicalChannelParameters.
 					 multiplexParameters.
@@ -450,7 +461,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
 	    t120.choice == eDataProtocolCapability_separateLANStack &&
 	    olc->separateStack.networkAddress.choice ==
 	    eNetworkAccessParameters_networkAddress_localAreaAddress) {
-		ret = expect_t120(skb, ct, ctinfo, data, dataoff,
+		ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff,
 				  &olc->separateStack.networkAddress.
 				  localAreaAddress);
 		if (ret < 0)
@@ -463,7 +474,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
+			unsigned int protoff, unsigned char **data, int dataoff,
 			OpenLogicalChannelAck *olca)
 {
 	H2250LogicalChannelAckParameters *ack;
@@ -479,7 +490,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
 		choice ==
 		eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
 	{
-		ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
+		ret = process_h245_channel(skb, ct, ctinfo,
+					   protoff, data, dataoff,
 					   &olca->
 					   reverseLogicalChannelParameters.
 					   multiplexParameters.
@@ -498,7 +510,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
 		if (ack->options &
 		    eH2250LogicalChannelAckParameters_mediaChannel) {
 			/* RTP */
-			ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+			ret = expect_rtp_rtcp(skb, ct, ctinfo,
+					      protoff, data, dataoff,
 					      &ack->mediaChannel);
 			if (ret < 0)
 				return -1;
@@ -507,7 +520,8 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
 		if (ack->options &
 		    eH2250LogicalChannelAckParameters_mediaControlChannel) {
 			/* RTCP */
-			ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
+			ret = expect_rtp_rtcp(skb, ct, ctinfo,
+					      protoff, data, dataoff,
 					      &ack->mediaControlChannel);
 			if (ret < 0)
 				return -1;
@@ -517,7 +531,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
 	if ((olca->options & eOpenLogicalChannelAck_separateStack) &&
 		olca->separateStack.networkAddress.choice ==
 		eNetworkAccessParameters_networkAddress_localAreaAddress) {
-		ret = expect_t120(skb, ct, ctinfo, data, dataoff,
+		ret = expect_t120(skb, ct, ctinfo, protoff, data, dataoff,
 				  &olca->separateStack.networkAddress.
 				  localAreaAddress);
 		if (ret < 0)
@@ -530,14 +544,15 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
+			unsigned int protoff, unsigned char **data, int dataoff,
 			MultimediaSystemControlMessage *mscm)
 {
 	switch (mscm->choice) {
 	case eMultimediaSystemControlMessage_request:
 		if (mscm->request.choice ==
 		    eRequestMessage_openLogicalChannel) {
-			return process_olc(skb, ct, ctinfo, data, dataoff,
+			return process_olc(skb, ct, ctinfo,
+					   protoff, data, dataoff,
 					   &mscm->request.openLogicalChannel);
 		}
 		pr_debug("nf_ct_h323: H.245 Request %d\n",
@@ -546,7 +561,8 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
 	case eMultimediaSystemControlMessage_response:
 		if (mscm->response.choice ==
 		    eResponseMessage_openLogicalChannelAck) {
-			return process_olca(skb, ct, ctinfo, data, dataoff,
+			return process_olca(skb, ct, ctinfo,
+					    protoff, data, dataoff,
 					    &mscm->response.
 					    openLogicalChannelAck);
 		}
@@ -597,7 +613,8 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
 		}
 
 		/* Process H.245 signal */
-		if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0)
+		if (process_h245(skb, ct, ctinfo, protoff,
+				 &data, dataoff, &mscm) < 0)
 			goto drop;
 	}
 
@@ -661,7 +678,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data,
 /****************************************************************************/
 static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, int dataoff,
+		       unsigned int protoff, unsigned char **data, int dataoff,
 		       TransportAddress *taddr)
 {
 	int dir = CTINFO2DIR(ctinfo);
@@ -693,7 +710,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr,
+		ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -779,6 +796,7 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 static int expect_callforwarding(struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
+				 unsigned int protoff,
 				 unsigned char **data, int dataoff,
 				 TransportAddress *taddr)
 {
@@ -817,7 +835,8 @@ static int expect_callforwarding(struct sk_buff *skb,
 	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
 		/* Need NAT */
-		ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff,
+		ret = nat_callforwarding(skb, ct, ctinfo,
+					 protoff, data, dataoff,
 					 taddr, port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -835,6 +854,7 @@ static int expect_callforwarding(struct sk_buff *skb,
 /****************************************************************************/
 static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
 			 unsigned char **data, int dataoff,
 			 Setup_UUIE *setup)
 {
@@ -848,7 +868,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 	pr_debug("nf_ct_q931: Setup\n");
 
 	if (setup->options & eSetup_UUIE_h245Address) {
-		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
 				  &setup->h245Address);
 		if (ret < 0)
 			return -1;
@@ -864,7 +884,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 		pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n",
 			 &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3,
 			 ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
-		ret = set_h225_addr(skb, data, dataoff,
+		ret = set_h225_addr(skb, protoff, data, dataoff,
 				    &setup->destCallSignalAddress,
 				    &ct->tuplehash[!dir].tuple.src.u3,
 				    ct->tuplehash[!dir].tuple.src.u.tcp.port);
@@ -881,7 +901,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 		pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n",
 			 &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3,
 			 ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
-		ret = set_h225_addr(skb, data, dataoff,
+		ret = set_h225_addr(skb, protoff, data, dataoff,
 				    &setup->sourceCallSignalAddress,
 				    &ct->tuplehash[!dir].tuple.dst.u3,
 				    ct->tuplehash[!dir].tuple.dst.u.tcp.port);
@@ -891,7 +911,8 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (setup->options & eSetup_UUIE_fastStart) {
 		for (i = 0; i < setup->fastStart.count; i++) {
-			ret = process_olc(skb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
 					  &setup->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -905,6 +926,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 static int process_callproceeding(struct sk_buff *skb,
 				  struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
+				  unsigned int protoff,
 				  unsigned char **data, int dataoff,
 				  CallProceeding_UUIE *callproc)
 {
@@ -914,7 +936,7 @@ static int process_callproceeding(struct sk_buff *skb,
 	pr_debug("nf_ct_q931: CallProceeding\n");
 
 	if (callproc->options & eCallProceeding_UUIE_h245Address) {
-		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
 				  &callproc->h245Address);
 		if (ret < 0)
 			return -1;
@@ -922,7 +944,8 @@ static int process_callproceeding(struct sk_buff *skb,
 
 	if (callproc->options & eCallProceeding_UUIE_fastStart) {
 		for (i = 0; i < callproc->fastStart.count; i++) {
-			ret = process_olc(skb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
 					  &callproc->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -935,6 +958,7 @@ static int process_callproceeding(struct sk_buff *skb,
 /****************************************************************************/
 static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
+			   unsigned int protoff,
 			   unsigned char **data, int dataoff,
 			   Connect_UUIE *connect)
 {
@@ -944,7 +968,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
 	pr_debug("nf_ct_q931: Connect\n");
 
 	if (connect->options & eConnect_UUIE_h245Address) {
-		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
 				  &connect->h245Address);
 		if (ret < 0)
 			return -1;
@@ -952,7 +976,8 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (connect->options & eConnect_UUIE_fastStart) {
 		for (i = 0; i < connect->fastStart.count; i++) {
-			ret = process_olc(skb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
 					  &connect->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -965,6 +990,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
+			    unsigned int protoff,
 			    unsigned char **data, int dataoff,
 			    Alerting_UUIE *alert)
 {
@@ -974,7 +1000,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
 	pr_debug("nf_ct_q931: Alerting\n");
 
 	if (alert->options & eAlerting_UUIE_h245Address) {
-		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
 				  &alert->h245Address);
 		if (ret < 0)
 			return -1;
@@ -982,7 +1008,8 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (alert->options & eAlerting_UUIE_fastStart) {
 		for (i = 0; i < alert->fastStart.count; i++) {
-			ret = process_olc(skb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
 					  &alert->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -995,6 +1022,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
+			    unsigned int protoff,
 			    unsigned char **data, int dataoff,
 			    Facility_UUIE *facility)
 {
@@ -1005,15 +1033,15 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (facility->reason.choice == eFacilityReason_callForwarded) {
 		if (facility->options & eFacility_UUIE_alternativeAddress)
-			return expect_callforwarding(skb, ct, ctinfo, data,
-						     dataoff,
+			return expect_callforwarding(skb, ct, ctinfo,
+						     protoff, data, dataoff,
 						     &facility->
 						     alternativeAddress);
 		return 0;
 	}
 
 	if (facility->options & eFacility_UUIE_h245Address) {
-		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
 				  &facility->h245Address);
 		if (ret < 0)
 			return -1;
@@ -1021,7 +1049,8 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (facility->options & eFacility_UUIE_fastStart) {
 		for (i = 0; i < facility->fastStart.count; i++) {
-			ret = process_olc(skb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
 					  &facility->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1034,6 +1063,7 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
+			    unsigned int protoff,
 			    unsigned char **data, int dataoff,
 			    Progress_UUIE *progress)
 {
@@ -1043,7 +1073,7 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
 	pr_debug("nf_ct_q931: Progress\n");
 
 	if (progress->options & eProgress_UUIE_h245Address) {
-		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, protoff, data, dataoff,
 				  &progress->h245Address);
 		if (ret < 0)
 			return -1;
@@ -1051,7 +1081,8 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (progress->options & eProgress_UUIE_fastStart) {
 		for (i = 0; i < progress->fastStart.count; i++) {
-			ret = process_olc(skb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo,
+					  protoff, data, dataoff,
 					  &progress->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1064,7 +1095,8 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff, Q931 *q931)
+			unsigned int protoff, unsigned char **data, int dataoff,
+			Q931 *q931)
 {
 	H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
 	int i;
@@ -1072,28 +1104,29 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
 
 	switch (pdu->h323_message_body.choice) {
 	case eH323_UU_PDU_h323_message_body_setup:
-		ret = process_setup(skb, ct, ctinfo, data, dataoff,
+		ret = process_setup(skb, ct, ctinfo, protoff, data, dataoff,
 				    &pdu->h323_message_body.setup);
 		break;
 	case eH323_UU_PDU_h323_message_body_callProceeding:
-		ret = process_callproceeding(skb, ct, ctinfo, data, dataoff,
+		ret = process_callproceeding(skb, ct, ctinfo,
+					     protoff, data, dataoff,
 					     &pdu->h323_message_body.
 					     callProceeding);
 		break;
 	case eH323_UU_PDU_h323_message_body_connect:
-		ret = process_connect(skb, ct, ctinfo, data, dataoff,
+		ret = process_connect(skb, ct, ctinfo, protoff, data, dataoff,
 				      &pdu->h323_message_body.connect);
 		break;
 	case eH323_UU_PDU_h323_message_body_alerting:
-		ret = process_alerting(skb, ct, ctinfo, data, dataoff,
+		ret = process_alerting(skb, ct, ctinfo, protoff, data, dataoff,
 				       &pdu->h323_message_body.alerting);
 		break;
 	case eH323_UU_PDU_h323_message_body_facility:
-		ret = process_facility(skb, ct, ctinfo, data, dataoff,
+		ret = process_facility(skb, ct, ctinfo, protoff, data, dataoff,
 				       &pdu->h323_message_body.facility);
 		break;
 	case eH323_UU_PDU_h323_message_body_progress:
-		ret = process_progress(skb, ct, ctinfo, data, dataoff,
+		ret = process_progress(skb, ct, ctinfo, protoff, data, dataoff,
 				       &pdu->h323_message_body.progress);
 		break;
 	default:
@@ -1107,7 +1140,8 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (pdu->options & eH323_UU_PDU_h245Control) {
 		for (i = 0; i < pdu->h245Control.count; i++) {
-			ret = process_h245(skb, ct, ctinfo, data, dataoff,
+			ret = process_h245(skb, ct, ctinfo,
+					   protoff, data, dataoff,
 					   &pdu->h245Control.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1152,7 +1186,8 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
 		}
 
 		/* Process Q.931 signal */
-		if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0)
+		if (process_q931(skb, ct, ctinfo, protoff,
+				 &data, dataoff, &q931) < 0)
 			goto drop;
 	}
 
@@ -1249,7 +1284,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp,
 /****************************************************************************/
 static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data,
+		       unsigned int protoff, unsigned char **data,
 		       TransportAddress *taddr, int count)
 {
 	struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1286,7 +1321,8 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 	nat_q931 = rcu_dereference(nat_q931_hook);
 	if (nat_q931 && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {	/* Need NAT */
-		ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp);
+		ret = nat_q931(skb, ct, ctinfo, protoff, data,
+			       taddr, i, port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
 			pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1306,6 +1342,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, GatekeeperRequest *grq)
 {
 	typeof(set_ras_addr_hook) set_ras_addr;
@@ -1315,7 +1352,7 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK)	/* NATed */
-		return set_ras_addr(skb, ct, ctinfo, data,
+		return set_ras_addr(skb, ct, ctinfo, protoff, data,
 				    &grq->rasAddress, 1);
 	return 0;
 }
@@ -1323,6 +1360,7 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, GatekeeperConfirm *gcf)
 {
 	int dir = CTINFO2DIR(ctinfo);
@@ -1367,6 +1405,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, RegistrationRequest *rrq)
 {
 	struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1375,7 +1414,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
 
 	pr_debug("nf_ct_ras: RRQ\n");
 
-	ret = expect_q931(skb, ct, ctinfo, data,
+	ret = expect_q931(skb, ct, ctinfo, protoff, data,
 			  rrq->callSignalAddress.item,
 			  rrq->callSignalAddress.count);
 	if (ret < 0)
@@ -1384,7 +1423,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
-		ret = set_ras_addr(skb, ct, ctinfo, data,
+		ret = set_ras_addr(skb, ct, ctinfo, protoff, data,
 				   rrq->rasAddress.item,
 				   rrq->rasAddress.count);
 		if (ret < 0)
@@ -1403,6 +1442,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, RegistrationConfirm *rcf)
 {
 	struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1416,7 +1456,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(skb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
 					rcf->callSignalAddress.item,
 					rcf->callSignalAddress.count);
 		if (ret < 0)
@@ -1453,6 +1493,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, UnregistrationRequest *urq)
 {
 	struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1465,7 +1506,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(skb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
 				   urq->callSignalAddress.item,
 				   urq->callSignalAddress.count);
 		if (ret < 0)
@@ -1486,6 +1527,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, AdmissionRequest *arq)
 {
 	const struct nf_ct_h323_master *info = nfct_help_data(ct);
@@ -1505,7 +1547,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
 	    nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    set_h225_addr && ct->status & IPS_NAT_MASK) {
 		/* Answering ARQ */
-		return set_h225_addr(skb, data, 0,
+		return set_h225_addr(skb, protoff, data, 0,
 				     &arq->destCallSignalAddress,
 				     &ct->tuplehash[!dir].tuple.dst.u3,
 				     info->sig_port[!dir]);
@@ -1518,7 +1560,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
 	    set_h225_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
 		/* Calling ARQ */
-		return set_h225_addr(skb, data, 0,
+		return set_h225_addr(skb, protoff, data, 0,
 				     &arq->srcCallSignalAddress,
 				     &ct->tuplehash[!dir].tuple.dst.u3,
 				     port);
@@ -1530,6 +1572,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, AdmissionConfirm *acf)
 {
 	int dir = CTINFO2DIR(ctinfo);
@@ -1550,7 +1593,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
 		set_sig_addr = rcu_dereference(set_sig_addr_hook);
 		if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 		    ct->status & IPS_NAT_MASK)
-			return set_sig_addr(skb, ct, ctinfo, data,
+			return set_sig_addr(skb, ct, ctinfo, protoff, data,
 					    &acf->destCallSignalAddress, 1);
 		return 0;
 	}
@@ -1578,6 +1621,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, LocationRequest *lrq)
 {
 	typeof(set_ras_addr_hook) set_ras_addr;
@@ -1587,7 +1631,7 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK)
-		return set_ras_addr(skb, ct, ctinfo, data,
+		return set_ras_addr(skb, ct, ctinfo, protoff, data,
 				    &lrq->replyAddress, 1);
 	return 0;
 }
@@ -1595,6 +1639,7 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, LocationConfirm *lcf)
 {
 	int dir = CTINFO2DIR(ctinfo);
@@ -1634,6 +1679,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, InfoRequestResponse *irr)
 {
 	int ret;
@@ -1645,7 +1691,7 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
-		ret = set_ras_addr(skb, ct, ctinfo, data,
+		ret = set_ras_addr(skb, ct, ctinfo, protoff, data,
 				   &irr->rasAddress, 1);
 		if (ret < 0)
 			return -1;
@@ -1654,7 +1700,7 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(skb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, protoff, data,
 					irr->callSignalAddress.item,
 					irr->callSignalAddress.count);
 		if (ret < 0)
@@ -1667,38 +1713,39 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
 /****************************************************************************/
 static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
+		       unsigned int protoff,
 		       unsigned char **data, RasMessage *ras)
 {
 	switch (ras->choice) {
 	case eRasMessage_gatekeeperRequest:
-		return process_grq(skb, ct, ctinfo, data,
+		return process_grq(skb, ct, ctinfo, protoff, data,
 				   &ras->gatekeeperRequest);
 	case eRasMessage_gatekeeperConfirm:
-		return process_gcf(skb, ct, ctinfo, data,
+		return process_gcf(skb, ct, ctinfo, protoff, data,
 				   &ras->gatekeeperConfirm);
 	case eRasMessage_registrationRequest:
-		return process_rrq(skb, ct, ctinfo, data,
+		return process_rrq(skb, ct, ctinfo, protoff, data,
 				   &ras->registrationRequest);
 	case eRasMessage_registrationConfirm:
-		return process_rcf(skb, ct, ctinfo, data,
+		return process_rcf(skb, ct, ctinfo, protoff, data,
 				   &ras->registrationConfirm);
 	case eRasMessage_unregistrationRequest:
-		return process_urq(skb, ct, ctinfo, data,
+		return process_urq(skb, ct, ctinfo, protoff, data,
 				   &ras->unregistrationRequest);
 	case eRasMessage_admissionRequest:
-		return process_arq(skb, ct, ctinfo, data,
+		return process_arq(skb, ct, ctinfo, protoff, data,
 				   &ras->admissionRequest);
 	case eRasMessage_admissionConfirm:
-		return process_acf(skb, ct, ctinfo, data,
+		return process_acf(skb, ct, ctinfo, protoff, data,
 				   &ras->admissionConfirm);
 	case eRasMessage_locationRequest:
-		return process_lrq(skb, ct, ctinfo, data,
+		return process_lrq(skb, ct, ctinfo, protoff, data,
 				   &ras->locationRequest);
 	case eRasMessage_locationConfirm:
-		return process_lcf(skb, ct, ctinfo, data,
+		return process_lcf(skb, ct, ctinfo, protoff, data,
 				   &ras->locationConfirm);
 	case eRasMessage_infoRequestResponse:
-		return process_irr(skb, ct, ctinfo, data,
+		return process_irr(skb, ct, ctinfo, protoff, data,
 				   &ras->infoRequestResponse);
 	default:
 		pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
@@ -1738,7 +1785,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
 	}
 
 	/* Process RAS message */
-	if (process_ras(skb, ct, ctinfo, &data, &ras) < 0)
+	if (process_ras(skb, ct, ctinfo, protoff, &data, &ras) < 0)
 		goto drop;
 
       accept:
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index e06dc2fab19f..95d097cdb202 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -33,6 +33,7 @@ static DEFINE_SPINLOCK(irc_buffer_lock);
 
 unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
+				unsigned int protoff,
 				unsigned int matchoff,
 				unsigned int matchlen,
 				struct nf_conntrack_expect *exp) __read_mostly;
@@ -206,7 +207,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 			nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
 			if (nf_nat_irc && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 			    ct->status & IPS_NAT_MASK)
-				ret = nf_nat_irc(skb, ctinfo,
+				ret = nf_nat_irc(skb, ctinfo, protoff,
 						 addr_beg_p - ib_ptr,
 						 addr_end_p - addr_beg_p,
 						 exp);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 6fed9ec35248..cc7669ef0b95 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -45,14 +45,14 @@ static DEFINE_SPINLOCK(nf_pptp_lock);
 int
 (*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
 			     struct nf_conn *ct, enum ip_conntrack_info ctinfo,
-			     struct PptpControlHeader *ctlh,
+			     unsigned int protoff, struct PptpControlHeader *ctlh,
 			     union pptp_ctrl_union *pptpReq) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
 
 int
 (*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
 			    struct nf_conn *ct, enum ip_conntrack_info ctinfo,
-			    struct PptpControlHeader *ctlh,
+			    unsigned int protoff, struct PptpControlHeader *ctlh,
 			    union pptp_ctrl_union *pptpReq) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_inbound);
 
@@ -262,7 +262,7 @@ out_unexpect_orig:
 }
 
 static inline int
-pptp_inbound_pkt(struct sk_buff *skb,
+pptp_inbound_pkt(struct sk_buff *skb, unsigned int protoff,
 		 struct PptpControlHeader *ctlh,
 		 union pptp_ctrl_union *pptpReq,
 		 unsigned int reqlen,
@@ -376,7 +376,8 @@ pptp_inbound_pkt(struct sk_buff *skb,
 
 	nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
 	if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
-		return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq);
+		return nf_nat_pptp_inbound(skb, ct, ctinfo,
+					   protoff, ctlh, pptpReq);
 	return NF_ACCEPT;
 
 invalid:
@@ -389,7 +390,7 @@ invalid:
 }
 
 static inline int
-pptp_outbound_pkt(struct sk_buff *skb,
+pptp_outbound_pkt(struct sk_buff *skb, unsigned int protoff,
 		  struct PptpControlHeader *ctlh,
 		  union pptp_ctrl_union *pptpReq,
 		  unsigned int reqlen,
@@ -471,7 +472,8 @@ pptp_outbound_pkt(struct sk_buff *skb,
 
 	nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
 	if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
-		return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq);
+		return nf_nat_pptp_outbound(skb, ct, ctinfo,
+					    protoff, ctlh, pptpReq);
 	return NF_ACCEPT;
 
 invalid:
@@ -570,11 +572,11 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
 	 * established from PNS->PAC.  However, RFC makes no guarantee */
 	if (dir == IP_CT_DIR_ORIGINAL)
 		/* client -> server (PNS -> PAC) */
-		ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
+		ret = pptp_outbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct,
 					ctinfo);
 	else
 		/* server -> client (PAC -> PNS) */
-		ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
+		ret = pptp_inbound_pkt(skb, protoff, ctlh, pptpReq, reqlen, ct,
 				       ctinfo);
 	pr_debug("sstate: %d->%d, cstate: %d->%d\n",
 		 oldsstate, info->sstate, oldcstate, info->cstate);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d08e0baf4640..590f0abaab8c 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -52,8 +52,8 @@ module_param(sip_direct_media, int, 0600);
 MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
 				   "endpoints only (default 1)");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff,
-				const char **dptr,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff,
+				unsigned int dataoff, const char **dptr,
 				unsigned int *datalen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
@@ -61,6 +61,7 @@ void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
 
 unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
+				       unsigned int protoff,
 				       unsigned int dataoff,
 				       const char **dptr,
 				       unsigned int *datalen,
@@ -69,7 +70,8 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 				       unsigned int matchlen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook);
 
-unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff,
+unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int protoff,
+				     unsigned int dataoff,
 				     const char **dptr,
 				     unsigned int *datalen,
 				     unsigned int sdpoff,
@@ -79,7 +81,8 @@ unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff,
 				     __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook);
 
-unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff,
+unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int protoff,
+				     unsigned int dataoff,
 				     const char **dptr,
 				     unsigned int *datalen,
 				     unsigned int matchoff,
@@ -88,6 +91,7 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff,
 EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook);
 
 unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
+					unsigned int protoff,
 					unsigned int dataoff,
 					const char **dptr,
 					unsigned int *datalen,
@@ -96,7 +100,8 @@ unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb,
 					__read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook);
 
-unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff,
+unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int protoff,
+				      unsigned int dataoff,
 				      const char **dptr,
 				      unsigned int *datalen,
 				      struct nf_conntrack_expect *rtp_exp,
@@ -883,7 +888,8 @@ static void flush_expectations(struct nf_conn *ct, bool media)
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 
-static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
+static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
+				 unsigned int dataoff,
 				 const char **dptr, unsigned int *datalen,
 				 union nf_inet_addr *daddr, __be16 port,
 				 enum sip_expectation_classes class,
@@ -960,7 +966,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
 	if (direct_rtp) {
 		nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook);
 		if (nf_nat_sdp_port &&
-		    !nf_nat_sdp_port(skb, dataoff, dptr, datalen,
+		    !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
 				     mediaoff, medialen, ntohs(rtp_port)))
 			goto err1;
 	}
@@ -983,7 +989,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff,
 	nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
 	if (nf_nat_sdp_media && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK && !direct_rtp)
-		ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen,
+		ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen,
 				       rtp_exp, rtcp_exp,
 				       mediaoff, medialen, daddr);
 	else {
@@ -1024,7 +1030,8 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr,
 	return NULL;
 }
 
-static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
+static int process_sdp(struct sk_buff *skb, unsigned int protoff,
+		       unsigned int dataoff,
 		       const char **dptr, unsigned int *datalen,
 		       unsigned int cseq)
 {
@@ -1098,7 +1105,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 		else
 			return NF_DROP;
 
-		ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen,
+		ret = set_expected_rtp_rtcp(skb, protoff, dataoff,
+					    dptr, datalen,
 					    &rtp_addr, htons(port), t->class,
 					    mediaoff, medialen);
 		if (ret != NF_ACCEPT)
@@ -1107,7 +1115,8 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 		/* Update media connection address if present */
 		if (maddr_len && nf_nat_sdp_addr &&
 		    nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) {
-			ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen,
+			ret = nf_nat_sdp_addr(skb, protoff, dataoff,
+					      dptr, datalen,
 					      mediaoff, c_hdr, SDP_HDR_MEDIA,
 					      &rtp_addr);
 			if (ret != NF_ACCEPT)
@@ -1120,12 +1129,13 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff,
 	nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
 	if (nf_nat_sdp_session && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff,
-					 &rtp_addr);
+		ret = nf_nat_sdp_session(skb, protoff, dataoff,
+					 dptr, datalen, sdpoff, &rtp_addr);
 
 	return ret;
 }
-static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_invite_response(struct sk_buff *skb, unsigned int protoff,
+				   unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq, unsigned int code)
 {
@@ -1135,13 +1145,14 @@ static int process_invite_response(struct sk_buff *skb, unsigned int dataoff,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dataoff, dptr, datalen, cseq);
+		return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
 	else if (ct_sip_info->invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_update_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_update_response(struct sk_buff *skb, unsigned int protoff,
+				   unsigned int dataoff,
 				   const char **dptr, unsigned int *datalen,
 				   unsigned int cseq, unsigned int code)
 {
@@ -1151,13 +1162,14 @@ static int process_update_response(struct sk_buff *skb, unsigned int dataoff,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dataoff, dptr, datalen, cseq);
+		return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
 	else if (ct_sip_info->invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_prack_response(struct sk_buff *skb, unsigned int protoff,
+				  unsigned int dataoff,
 				  const char **dptr, unsigned int *datalen,
 				  unsigned int cseq, unsigned int code)
 {
@@ -1167,13 +1179,14 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff,
 
 	if ((code >= 100 && code <= 199) ||
 	    (code >= 200 && code <= 299))
-		return process_sdp(skb, dataoff, dptr, datalen, cseq);
+		return process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
 	else if (ct_sip_info->invite_cseq == cseq)
 		flush_expectations(ct, true);
 	return NF_ACCEPT;
 }
 
-static int process_invite_request(struct sk_buff *skb, unsigned int dataoff,
+static int process_invite_request(struct sk_buff *skb, unsigned int protoff,
+				  unsigned int dataoff,
 				  const char **dptr, unsigned int *datalen,
 				  unsigned int cseq)
 {
@@ -1183,13 +1196,14 @@ static int process_invite_request(struct sk_buff *skb, unsigned int dataoff,
 	unsigned int ret;
 
 	flush_expectations(ct, true);
-	ret = process_sdp(skb, dataoff, dptr, datalen, cseq);
+	ret = process_sdp(skb, protoff, dataoff, dptr, datalen, cseq);
 	if (ret == NF_ACCEPT)
 		ct_sip_info->invite_cseq = cseq;
 	return ret;
 }
 
-static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
+static int process_bye_request(struct sk_buff *skb, unsigned int protoff,
+			       unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen,
 			       unsigned int cseq)
 {
@@ -1204,7 +1218,8 @@ static int process_bye_request(struct sk_buff *skb, unsigned int dataoff,
  * signalling connections. The expectation is marked inactive and is activated
  * when receiving a response indicating success from the registrar.
  */
-static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
+static int process_register_request(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
 				    const char **dptr, unsigned int *datalen,
 				    unsigned int cseq)
 {
@@ -1280,8 +1295,8 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff,
 	nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
 	if (nf_nat_sip_expect && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp,
-					matchoff, matchlen);
+		ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen,
+					exp, matchoff, matchlen);
 	else {
 		if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
@@ -1296,7 +1311,8 @@ store_cseq:
 	return ret;
 }
 
-static int process_register_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_register_response(struct sk_buff *skb, unsigned int protoff,
+				     unsigned int dataoff,
 				     const char **dptr, unsigned int *datalen,
 				     unsigned int cseq, unsigned int code)
 {
@@ -1378,7 +1394,8 @@ static const struct sip_handler sip_handlers[] = {
 	SIP_HANDLER("REGISTER", process_register_request, process_register_response),
 };
 
-static int process_sip_response(struct sk_buff *skb, unsigned int dataoff,
+static int process_sip_response(struct sk_buff *skb, unsigned int protoff,
+				unsigned int dataoff,
 				const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -1409,13 +1426,14 @@ static int process_sip_response(struct sk_buff *skb, unsigned int dataoff,
 		if (*datalen < matchend + handler->len ||
 		    strnicmp(*dptr + matchend, handler->method, handler->len))
 			continue;
-		return handler->response(skb, dataoff, dptr, datalen,
+		return handler->response(skb, protoff, dataoff, dptr, datalen,
 					 cseq, code);
 	}
 	return NF_ACCEPT;
 }
 
-static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
+static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
+			       unsigned int dataoff,
 			       const char **dptr, unsigned int *datalen)
 {
 	enum ip_conntrack_info ctinfo;
@@ -1440,27 +1458,29 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff,
 		if (!cseq)
 			return NF_DROP;
 
-		return handler->request(skb, dataoff, dptr, datalen, cseq);
+		return handler->request(skb, protoff, dataoff, dptr, datalen,
+					cseq);
 	}
 	return NF_ACCEPT;
 }
 
 static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
-			   unsigned int dataoff, const char **dptr,
-			   unsigned int *datalen)
+			   unsigned int protoff, unsigned int dataoff,
+			   const char **dptr, unsigned int *datalen)
 {
 	typeof(nf_nat_sip_hook) nf_nat_sip;
 	int ret;
 
 	if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0)
-		ret = process_sip_request(skb, dataoff, dptr, datalen);
+		ret = process_sip_request(skb, protoff, dataoff, dptr, datalen);
 	else
-		ret = process_sip_response(skb, dataoff, dptr, datalen);
+		ret = process_sip_response(skb, protoff, dataoff, dptr, datalen);
 
 	if (ret == NF_ACCEPT && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
 	    ct->status & IPS_NAT_MASK) {
 		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
-		if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen))
+		if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff,
+					      dptr, datalen))
 			ret = NF_DROP;
 	}
 
@@ -1528,7 +1548,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 		if (msglen > datalen)
 			return NF_DROP;
 
-		ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen);
+		ret = process_sip_msg(skb, ct, protoff, dataoff,
+				      &dptr, &msglen);
 		if (ret != NF_ACCEPT)
 			break;
 		diff     = msglen - origlen;
@@ -1570,7 +1591,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
 	if (datalen < strlen("SIP/2.0 200"))
 		return NF_ACCEPT;
 
-	return process_sip_msg(skb, ct, dataoff, &dptr, &datalen);
+	return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen);
 }
 
 static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
-- 
cgit v1.2.3


From c7232c9979cba684c50b64c513c4a83c9aa70563 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:14:06 +0200
Subject: netfilter: add protocol independent NAT core

Convert the IPv4 NAT implementation to a protocol independent core and
address family specific modules.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter.h                      |  14 +-
 include/linux/netfilter/nf_nat.h               |   8 +
 include/linux/netfilter/nfnetlink_conntrack.h  |   6 +-
 include/linux/netfilter_ipv4.h                 |   1 -
 include/net/netfilter/nf_conntrack_expect.h    |   2 +-
 include/net/netfilter/nf_nat.h                 |   2 +-
 include/net/netfilter/nf_nat_core.h            |   5 +-
 include/net/netfilter/nf_nat_l3proto.h         |  47 ++
 include/net/netfilter/nf_nat_l4proto.h         |  71 ++
 include/net/netfilter/nf_nat_protocol.h        |  67 --
 include/net/netfilter/nf_nat_rule.h            |  15 -
 include/net/netns/conntrack.h                  |   4 +
 include/net/netns/ipv4.h                       |   2 -
 net/ipv4/netfilter.c                           |  37 --
 net/ipv4/netfilter/Kconfig                     |  64 +-
 net/ipv4/netfilter/Makefile                    |  11 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c            |  15 +-
 net/ipv4/netfilter/ipt_NETMAP.c                |  15 +-
 net/ipv4/netfilter/ipt_REDIRECT.c              |  15 +-
 net/ipv4/netfilter/iptable_nat.c               | 320 +++++++++
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |   6 -
 net/ipv4/netfilter/nf_nat_amanda.c             |   1 -
 net/ipv4/netfilter/nf_nat_core.c               | 763 ----------------------
 net/ipv4/netfilter/nf_nat_ftp.c                |   1 -
 net/ipv4/netfilter/nf_nat_h323.c               |  23 +-
 net/ipv4/netfilter/nf_nat_helper.c             | 461 -------------
 net/ipv4/netfilter/nf_nat_irc.c                |   1 -
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c       | 281 ++++++++
 net/ipv4/netfilter/nf_nat_pptp.c               |  15 +-
 net/ipv4/netfilter/nf_nat_proto_common.c       | 114 ----
 net/ipv4/netfilter/nf_nat_proto_dccp.c         | 106 ---
 net/ipv4/netfilter/nf_nat_proto_gre.c          |  30 +-
 net/ipv4/netfilter/nf_nat_proto_icmp.c         |  24 +-
 net/ipv4/netfilter/nf_nat_proto_sctp.c         |  96 ---
 net/ipv4/netfilter/nf_nat_proto_tcp.c          |  91 ---
 net/ipv4/netfilter/nf_nat_proto_udp.c          |  82 ---
 net/ipv4/netfilter/nf_nat_proto_udplite.c      |  98 ---
 net/ipv4/netfilter/nf_nat_proto_unknown.c      |  52 --
 net/ipv4/netfilter/nf_nat_rule.c               | 214 -------
 net/ipv4/netfilter/nf_nat_sip.c                |  19 +-
 net/ipv4/netfilter/nf_nat_standalone.c         | 326 ----------
 net/ipv4/netfilter/nf_nat_tftp.c               |   1 -
 net/netfilter/Kconfig                          |  24 +
 net/netfilter/Makefile                         |  11 +
 net/netfilter/core.c                           |   5 +
 net/netfilter/nf_conntrack_core.c              |   6 +
 net/netfilter/nf_conntrack_netlink.c           |  35 +-
 net/netfilter/nf_conntrack_proto_tcp.c         |   8 +-
 net/netfilter/nf_conntrack_sip.c               |   6 +-
 net/netfilter/nf_nat_core.c                    | 854 +++++++++++++++++++++++++
 net/netfilter/nf_nat_helper.c                  | 435 +++++++++++++
 net/netfilter/nf_nat_proto_common.c            | 112 ++++
 net/netfilter/nf_nat_proto_dccp.c              | 116 ++++
 net/netfilter/nf_nat_proto_sctp.c              | 103 +++
 net/netfilter/nf_nat_proto_tcp.c               |  85 +++
 net/netfilter/nf_nat_proto_udp.c               |  76 +++
 net/netfilter/nf_nat_proto_udplite.c           | 106 +++
 net/netfilter/nf_nat_proto_unknown.c           |  54 ++
 net/netfilter/xt_nat.c                         | 167 +++++
 59 files changed, 3042 insertions(+), 2687 deletions(-)
 create mode 100644 include/net/netfilter/nf_nat_l3proto.h
 create mode 100644 include/net/netfilter/nf_nat_l4proto.h
 delete mode 100644 include/net/netfilter/nf_nat_protocol.h
 delete mode 100644 include/net/netfilter/nf_nat_rule.h
 create mode 100644 net/ipv4/netfilter/iptable_nat.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_core.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_helper.c
 create mode 100644 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_proto_common.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_proto_dccp.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_proto_sctp.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_proto_tcp.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_proto_udp.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_proto_udplite.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_proto_unknown.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_rule.c
 delete mode 100644 net/ipv4/netfilter/nf_nat_standalone.c
 create mode 100644 net/netfilter/nf_nat_core.c
 create mode 100644 net/netfilter/nf_nat_helper.c
 create mode 100644 net/netfilter/nf_nat_proto_common.c
 create mode 100644 net/netfilter/nf_nat_proto_dccp.c
 create mode 100644 net/netfilter/nf_nat_proto_sctp.c
 create mode 100644 net/netfilter/nf_nat_proto_tcp.c
 create mode 100644 net/netfilter/nf_nat_proto_udp.c
 create mode 100644 net/netfilter/nf_nat_proto_udplite.c
 create mode 100644 net/netfilter/nf_nat_proto_unknown.c
 create mode 100644 net/netfilter/xt_nat.c

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index c613cf0d7884..1dcf2a38e51f 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -342,7 +342,7 @@ extern int nf_register_afinfo(const struct nf_afinfo *afinfo);
 extern void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
 
 #include <net/flow.h>
-extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
+extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
 
 static inline void
 nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
@@ -350,13 +350,11 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 #ifdef CONFIG_NF_NAT_NEEDED
 	void (*decodefn)(struct sk_buff *, struct flowi *);
 
-	if (family == AF_INET) {
-		rcu_read_lock();
-		decodefn = rcu_dereference(ip_nat_decode_session);
-		if (decodefn)
-			decodefn(skb, fl);
-		rcu_read_unlock();
-	}
+	rcu_read_lock();
+	decodefn = rcu_dereference(nf_nat_decode_session_hook);
+	if (decodefn)
+		decodefn(skb, fl);
+	rcu_read_unlock();
 #endif
 }
 
diff --git a/include/linux/netfilter/nf_nat.h b/include/linux/netfilter/nf_nat.h
index 8df2d13730b2..bf0cc373ffb6 100644
--- a/include/linux/netfilter/nf_nat.h
+++ b/include/linux/netfilter/nf_nat.h
@@ -22,4 +22,12 @@ struct nf_nat_ipv4_multi_range_compat {
 	struct nf_nat_ipv4_range	range[1];
 };
 
+struct nf_nat_range {
+	unsigned int			flags;
+	union nf_inet_addr		min_addr;
+	union nf_inet_addr		max_addr;
+	union nf_conntrack_man_proto	min_proto;
+	union nf_conntrack_man_proto	max_proto;
+};
+
 #endif /* _NETFILTER_NF_NAT_H */
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index f649f7423ca2..68920eab287c 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -142,8 +142,10 @@ enum ctattr_tstamp {
 
 enum ctattr_nat {
 	CTA_NAT_UNSPEC,
-	CTA_NAT_MINIP,
-	CTA_NAT_MAXIP,
+	CTA_NAT_V4_MINIP,
+#define CTA_NAT_MINIP CTA_NAT_V4_MINIP
+	CTA_NAT_V4_MAXIP,
+#define CTA_NAT_MAXIP CTA_NAT_V4_MAXIP
 	CTA_NAT_PROTO,
 	__CTA_NAT_MAX
 };
diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index e2b12801378d..b962dfc695ae 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -79,7 +79,6 @@ enum nf_ip_hook_priorities {
 
 #ifdef __KERNEL__
 extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type);
-extern int ip_xfrm_me_harder(struct sk_buff *skb);
 extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 				   unsigned int dataoff, u_int8_t protocol);
 #endif /*__KERNEL__*/
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 983f00263243..cc13f377a705 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -43,7 +43,7 @@ struct nf_conntrack_expect {
 	unsigned int class;
 
 #ifdef CONFIG_NF_NAT_NEEDED
-	__be32 saved_ip;
+	union nf_inet_addr saved_addr;
 	/* This is the original per-proto part, used to map the
 	 * expected connection the way the recipient expects. */
 	union nf_conntrack_man_proto saved_proto;
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index b4de990b55f1..1752f1339054 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -50,7 +50,7 @@ struct nf_conn_nat {
 
 /* Set up the info structure to map into this range. */
 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
-				      const struct nf_nat_ipv4_range *range,
+				      const struct nf_nat_range *range,
 				      enum nf_nat_manip_type maniptype);
 
 /* Is this tuple already taken? (not by us)*/
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index b13d8d18d595..972e1e47ec79 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -12,10 +12,7 @@ extern unsigned int nf_nat_packet(struct nf_conn *ct,
 				  unsigned int hooknum,
 				  struct sk_buff *skb);
 
-extern int nf_nat_icmp_reply_translation(struct nf_conn *ct,
-					 enum ip_conntrack_info ctinfo,
-					 unsigned int hooknum,
-					 struct sk_buff *skb);
+extern int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family);
 
 static inline int nf_nat_initialized(struct nf_conn *ct,
 				     enum nf_nat_manip_type manip)
diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
new file mode 100644
index 000000000000..beed96961fa7
--- /dev/null
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -0,0 +1,47 @@
+#ifndef _NF_NAT_L3PROTO_H
+#define _NF_NAT_L3PROTO_H
+
+struct nf_nat_l4proto;
+struct nf_nat_l3proto {
+	u8	l3proto;
+
+	bool	(*in_range)(const struct nf_conntrack_tuple *t,
+			    const struct nf_nat_range *range);
+
+	u32 	(*secure_port)(const struct nf_conntrack_tuple *t, __be16);
+
+	bool	(*manip_pkt)(struct sk_buff *skb,
+			     unsigned int iphdroff,
+			     const struct nf_nat_l4proto *l4proto,
+			     const struct nf_conntrack_tuple *target,
+			     enum nf_nat_manip_type maniptype);
+
+	void	(*csum_update)(struct sk_buff *skb, unsigned int iphdroff,
+			       __sum16 *check,
+			       const struct nf_conntrack_tuple *t,
+			       enum nf_nat_manip_type maniptype);
+
+	void	(*csum_recalc)(struct sk_buff *skb, u8 proto,
+			       void *data, __sum16 *check,
+			       int datalen, int oldlen);
+
+	void	(*decode_session)(struct sk_buff *skb,
+				  const struct nf_conn *ct,
+				  enum ip_conntrack_dir dir,
+				  unsigned long statusbit,
+				  struct flowi *fl);
+
+	int	(*nlattr_to_range)(struct nlattr *tb[],
+				   struct nf_nat_range *range);
+};
+
+extern int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
+extern void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *);
+extern const struct nf_nat_l3proto *__nf_nat_l3proto_find(u8 l3proto);
+
+extern int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+					 struct nf_conn *ct,
+					 enum ip_conntrack_info ctinfo,
+					 unsigned int hooknum);
+
+#endif /* _NF_NAT_L3PROTO_H */
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
new file mode 100644
index 000000000000..1f0a4f018fcf
--- /dev/null
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -0,0 +1,71 @@
+/* Header for use in defining a given protocol. */
+#ifndef _NF_NAT_L4PROTO_H
+#define _NF_NAT_L4PROTO_H
+#include <net/netfilter/nf_nat.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+struct nf_nat_range;
+struct nf_nat_l3proto;
+
+struct nf_nat_l4proto {
+	/* Protocol number. */
+	u8 l4proto;
+
+	/* Translate a packet to the target according to manip type.
+	 * Return true if succeeded.
+	 */
+	bool (*manip_pkt)(struct sk_buff *skb,
+			  const struct nf_nat_l3proto *l3proto,
+			  unsigned int iphdroff, unsigned int hdroff,
+			  const struct nf_conntrack_tuple *tuple,
+			  enum nf_nat_manip_type maniptype);
+
+	/* Is the manipable part of the tuple between min and max incl? */
+	bool (*in_range)(const struct nf_conntrack_tuple *tuple,
+			 enum nf_nat_manip_type maniptype,
+			 const union nf_conntrack_man_proto *min,
+			 const union nf_conntrack_man_proto *max);
+
+	/* Alter the per-proto part of the tuple (depending on
+	 * maniptype), to give a unique tuple in the given range if
+	 * possible.  Per-protocol part of tuple is initialized to the
+	 * incoming packet.
+	 */
+	void (*unique_tuple)(const struct nf_nat_l3proto *l3proto,
+			     struct nf_conntrack_tuple *tuple,
+			     const struct nf_nat_range *range,
+			     enum nf_nat_manip_type maniptype,
+			     const struct nf_conn *ct);
+
+	int (*nlattr_to_range)(struct nlattr *tb[],
+			       struct nf_nat_range *range);
+};
+
+/* Protocol registration. */
+extern int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto);
+extern void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto);
+
+extern const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto);
+
+/* Built-in protocols. */
+extern const struct nf_nat_l4proto nf_nat_l4proto_tcp;
+extern const struct nf_nat_l4proto nf_nat_l4proto_udp;
+extern const struct nf_nat_l4proto nf_nat_l4proto_icmp;
+extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
+
+extern bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
+				    enum nf_nat_manip_type maniptype,
+				    const union nf_conntrack_man_proto *min,
+				    const union nf_conntrack_man_proto *max);
+
+extern void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
+					struct nf_conntrack_tuple *tuple,
+					const struct nf_nat_range *range,
+					enum nf_nat_manip_type maniptype,
+					const struct nf_conn *ct,
+					u16 *rover);
+
+extern int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
+					  struct nf_nat_range *range);
+
+#endif /*_NF_NAT_L4PROTO_H*/
diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
deleted file mode 100644
index 7b0b51165f70..000000000000
--- a/include/net/netfilter/nf_nat_protocol.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* Header for use in defining a given protocol. */
-#ifndef _NF_NAT_PROTOCOL_H
-#define _NF_NAT_PROTOCOL_H
-#include <net/netfilter/nf_nat.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-struct nf_nat_ipv4_range;
-
-struct nf_nat_protocol {
-	/* Protocol number. */
-	unsigned int protonum;
-
-	/* Translate a packet to the target according to manip type.
-	   Return true if succeeded. */
-	bool (*manip_pkt)(struct sk_buff *skb,
-			  unsigned int iphdroff,
-			  const struct nf_conntrack_tuple *tuple,
-			  enum nf_nat_manip_type maniptype);
-
-	/* Is the manipable part of the tuple between min and max incl? */
-	bool (*in_range)(const struct nf_conntrack_tuple *tuple,
-			 enum nf_nat_manip_type maniptype,
-			 const union nf_conntrack_man_proto *min,
-			 const union nf_conntrack_man_proto *max);
-
-	/* Alter the per-proto part of the tuple (depending on
-	   maniptype), to give a unique tuple in the given range if
-	   possible.  Per-protocol part of tuple is initialized to the
-	   incoming packet. */
-	void (*unique_tuple)(struct nf_conntrack_tuple *tuple,
-			     const struct nf_nat_ipv4_range *range,
-			     enum nf_nat_manip_type maniptype,
-			     const struct nf_conn *ct);
-
-	int (*nlattr_to_range)(struct nlattr *tb[],
-			       struct nf_nat_ipv4_range *range);
-};
-
-/* Protocol registration. */
-extern int nf_nat_protocol_register(const struct nf_nat_protocol *proto);
-extern void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto);
-
-/* Built-in protocols. */
-extern const struct nf_nat_protocol nf_nat_protocol_tcp;
-extern const struct nf_nat_protocol nf_nat_protocol_udp;
-extern const struct nf_nat_protocol nf_nat_protocol_icmp;
-extern const struct nf_nat_protocol nf_nat_unknown_protocol;
-
-extern int init_protocols(void) __init;
-extern void cleanup_protocols(void);
-extern const struct nf_nat_protocol *find_nat_proto(u_int16_t protonum);
-
-extern bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
-				  enum nf_nat_manip_type maniptype,
-				  const union nf_conntrack_man_proto *min,
-				  const union nf_conntrack_man_proto *max);
-
-extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-				      const struct nf_nat_ipv4_range *range,
-				      enum nf_nat_manip_type maniptype,
-				      const struct nf_conn *ct,
-				      u_int16_t *rover);
-
-extern int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
-					struct nf_nat_ipv4_range *range);
-
-#endif /*_NF_NAT_PROTO_H*/
diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
deleted file mode 100644
index 2890bdc4cd92..000000000000
--- a/include/net/netfilter/nf_nat_rule.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _NF_NAT_RULE_H
-#define _NF_NAT_RULE_H
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-extern int nf_nat_rule_init(void) __init;
-extern void nf_nat_rule_cleanup(void);
-extern int nf_nat_rule_find(struct sk_buff *skb,
-			    unsigned int hooknum,
-			    const struct net_device *in,
-			    const struct net_device *out,
-			    struct nf_conn *ct);
-
-#endif /* _NF_NAT_RULE_H */
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 3aecdc7a84fb..a1d83cc8bf85 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -83,6 +83,10 @@ struct netns_ct {
 	int			sysctl_auto_assign_helper;
 	bool			auto_assign_helper_warned;
 	struct nf_ip_net	nf_ct_proto;
+#ifdef CONFIG_NF_NAT_NEEDED
+	struct hlist_head	*nat_bysource;
+	unsigned int		nat_htable_size;
+#endif
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header	*sysctl_header;
 	struct ctl_table_header	*acct_sysctl_header;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1474dd65c66f..ace280d19a20 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -51,8 +51,6 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_security;
 #endif
 	struct xt_table		*nat_table;
-	struct hlist_head	*nat_bysource;
-	unsigned int		nat_htable_size;
 #endif
 
 	int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index ed1b36783192..f1643c0c3587 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -72,43 +72,6 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
-#ifdef CONFIG_XFRM
-int ip_xfrm_me_harder(struct sk_buff *skb)
-{
-	struct flowi fl;
-	unsigned int hh_len;
-	struct dst_entry *dst;
-
-	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
-		return 0;
-	if (xfrm_decode_session(skb, &fl, AF_INET) < 0)
-		return -1;
-
-	dst = skb_dst(skb);
-	if (dst->xfrm)
-		dst = ((struct xfrm_dst *)dst)->route;
-	dst_hold(dst);
-
-	dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
-	if (IS_ERR(dst))
-		return -1;
-
-	skb_dst_drop(skb);
-	skb_dst_set(skb, dst);
-
-	/* Change in oif may mean change in hh_len. */
-	hh_len = skb_dst(skb)->dev->hard_header_len;
-	if (skb_headroom(skb) < hh_len &&
-	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
-		return -1;
-	return 0;
-}
-EXPORT_SYMBOL(ip_xfrm_me_harder);
-#endif
-
-void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
-EXPORT_SYMBOL(ip_nat_decode_session);
-
 /*
  * Extra routing may needed on local out, as the QUEUE target never
  * returns control to the table.
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index fcc543cd987a..b26629681bdb 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -143,25 +143,22 @@ config IP_NF_TARGET_ULOG
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 # NAT + specific targets: nf_conntrack
-config NF_NAT
-	tristate "Full NAT"
+config NF_NAT_IPV4
+	tristate "IPv4 NAT"
 	depends on NF_CONNTRACK_IPV4
 	default m if NETFILTER_ADVANCED=n
+	select NF_NAT
 	help
-	  The Full NAT option allows masquerading, port forwarding and other
+	  The IPv4 NAT option allows masquerading, port forwarding and other
 	  forms of full Network Address Port Translation.  It is controlled by
 	  the `nat' table in iptables: see the man page for iptables(8).
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NF_NAT_NEEDED
-	bool
-	depends on NF_NAT
-	default y
+if NF_NAT_IPV4
 
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
-	depends on NF_NAT
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Masquerading is a special case of NAT: all outgoing connections are
@@ -174,7 +171,6 @@ config IP_NF_TARGET_MASQUERADE
 
 config IP_NF_TARGET_NETMAP
 	tristate "NETMAP target support"
-	depends on NF_NAT
 	depends on NETFILTER_ADVANCED
 	help
 	  NETMAP is an implementation of static 1:1 NAT mapping of network
@@ -185,7 +181,6 @@ config IP_NF_TARGET_NETMAP
 
 config IP_NF_TARGET_REDIRECT
 	tristate "REDIRECT target support"
-	depends on NF_NAT
 	depends on NETFILTER_ADVANCED
 	help
 	  REDIRECT is a special case of NAT: all incoming connections are
@@ -195,9 +190,11 @@ config IP_NF_TARGET_REDIRECT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+endif
+
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support"
-	depends on NF_CONNTRACK_SNMP && NF_NAT
+	depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4
 	depends on NETFILTER_ADVANCED
 	default NF_NAT && NF_CONNTRACK_SNMP
 	---help---
@@ -219,61 +216,46 @@ config NF_NAT_SNMP_BASIC
 #           <expr> '&&' <expr>                   (6)
 #
 # (6) Returns the result of min(/expr/, /expr/).
-config NF_NAT_PROTO_DCCP
-	tristate
-	depends on NF_NAT && NF_CT_PROTO_DCCP
-	default NF_NAT && NF_CT_PROTO_DCCP
 
 config NF_NAT_PROTO_GRE
 	tristate
-	depends on NF_NAT && NF_CT_PROTO_GRE
-
-config NF_NAT_PROTO_UDPLITE
-	tristate
-	depends on NF_NAT && NF_CT_PROTO_UDPLITE
-	default NF_NAT && NF_CT_PROTO_UDPLITE
-
-config NF_NAT_PROTO_SCTP
-	tristate
-	default NF_NAT && NF_CT_PROTO_SCTP
-	depends on NF_NAT && NF_CT_PROTO_SCTP
-	select LIBCRC32C
+	depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE
 
 config NF_NAT_FTP
 	tristate
-	depends on NF_CONNTRACK && NF_NAT
-	default NF_NAT && NF_CONNTRACK_FTP
+	depends on NF_CONNTRACK && NF_NAT_IPV4
+	default NF_NAT_IPV4 && NF_CONNTRACK_FTP
 
 config NF_NAT_IRC
 	tristate
-	depends on NF_CONNTRACK && NF_NAT
-	default NF_NAT && NF_CONNTRACK_IRC
+	depends on NF_CONNTRACK && NF_NAT_IPV4
+	default NF_NAT_IPV4 && NF_CONNTRACK_IRC
 
 config NF_NAT_TFTP
 	tristate
-	depends on NF_CONNTRACK && NF_NAT
-	default NF_NAT && NF_CONNTRACK_TFTP
+	depends on NF_CONNTRACK && NF_NAT_IPV4
+	default NF_NAT_IPV4 && NF_CONNTRACK_TFTP
 
 config NF_NAT_AMANDA
 	tristate
-	depends on NF_CONNTRACK && NF_NAT
-	default NF_NAT && NF_CONNTRACK_AMANDA
+	depends on NF_CONNTRACK && NF_NAT_IPV4
+	default NF_NAT_IPV4 && NF_CONNTRACK_AMANDA
 
 config NF_NAT_PPTP
 	tristate
-	depends on NF_CONNTRACK && NF_NAT
-	default NF_NAT && NF_CONNTRACK_PPTP
+	depends on NF_CONNTRACK && NF_NAT_IPV4
+	default NF_NAT_IPV4 && NF_CONNTRACK_PPTP
 	select NF_NAT_PROTO_GRE
 
 config NF_NAT_H323
 	tristate
-	depends on NF_CONNTRACK && NF_NAT
-	default NF_NAT && NF_CONNTRACK_H323
+	depends on NF_CONNTRACK && NF_NAT_IPV4
+	default NF_NAT_IPV4 && NF_CONNTRACK_H323
 
 config NF_NAT_SIP
 	tristate
-	depends on NF_CONNTRACK && NF_NAT
-	default NF_NAT && NF_CONNTRACK_SIP
+	depends on NF_CONNTRACK && NF_NAT_IPV4
+	default NF_NAT_IPV4 && NF_CONNTRACK_SIP
 
 # mangle + specific targets
 config IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c20674dc9452..0ea3acc510e2 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -10,13 +10,11 @@ nf_conntrack_ipv4-objs	+= nf_conntrack_l3proto_ipv4_compat.o
 endif
 endif
 
-nf_nat-y		:= nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-iptable_nat-y	:= nf_nat_rule.o nf_nat_standalone.o
-
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
 
-obj-$(CONFIG_NF_NAT) += nf_nat.o
+nf_nat_ipv4-y		:= nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
 
 # defrag
 obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
@@ -32,10 +30,7 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
 
 # NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
 obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
 
 # generic IP tables 
 obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
@@ -43,7 +38,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index cbb6a1a6f6f7..1c3aa28b51ae 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -19,9 +19,9 @@
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <net/route.h>
-#include <net/netfilter/nf_nat_rule.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -49,7 +49,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
 	enum ip_conntrack_info ctinfo;
-	struct nf_nat_ipv4_range newrange;
+	struct nf_nat_range newrange;
 	const struct nf_nat_ipv4_multi_range_compat *mr;
 	const struct rtable *rt;
 	__be32 newsrc, nh;
@@ -80,10 +80,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	nat->masq_index = par->out->ifindex;
 
 	/* Transfer from original range. */
-	newrange = ((struct nf_nat_ipv4_range)
-		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
-		  newsrc, newsrc,
-		  mr->range[0].min, mr->range[0].max });
+	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+	newrange.flags       = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.ip = newsrc;
+	newrange.max_addr.ip = newsrc;
+	newrange.min_proto   = mr->range[0].min;
+	newrange.max_proto   = mr->range[0].max;
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index b5bfbbabf70d..85028dc0425d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
@@ -44,7 +44,7 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	__be32 new_ip, netmask;
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_ipv4_range newrange;
+	struct nf_nat_range newrange;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
@@ -61,10 +61,13 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		new_ip = ip_hdr(skb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
-	newrange = ((struct nf_nat_ipv4_range)
-		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
-		  new_ip, new_ip,
-		  mr->range[0].min, mr->range[0].max });
+	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+	newrange.flags	     = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.ip = new_ip;
+	newrange.max_addr.ip = new_ip;
+	newrange.min_proto   = mr->range[0].min;
+	newrange.max_proto   = mr->range[0].max;
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 7c0103a5203e..11407d7d2472 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -19,7 +19,7 @@
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_rule.h>
+#include <net/netfilter/nf_nat.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -48,7 +48,7 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	__be32 newdst;
 	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_ipv4_range newrange;
+	struct nf_nat_range newrange;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_OUT);
@@ -76,10 +76,13 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	}
 
 	/* Transfer from original range. */
-	newrange = ((struct nf_nat_ipv4_range)
-		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
-		  newdst, newdst,
-		  mr->range[0].min, mr->range[0].max });
+	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+	newrange.flags	     = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.ip = newdst;
+	newrange.max_addr.ip = newdst;
+	newrange.min_proto   = mr->range[0].min;
+	newrange.max_proto   = mr->range[0].max;
 
 	/* Hand modified range to generic setup. */
 	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
new file mode 100644
index 000000000000..9e0ffaf1d942
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -0,0 +1,320 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv4_table = {
+	.name		= "nat",
+	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
+	.me		= THIS_MODULE,
+	.af		= NFPROTO_IPV4,
+};
+
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+	/* Force range to this IP; let proto decide mapping for
+	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	 */
+	struct nf_nat_range range;
+
+	range.flags = 0;
+	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
+		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
+		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
+
+	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+				     const struct net_device *in,
+				     const struct net_device *out,
+				     struct nf_conn *ct)
+{
+	struct net *net = nf_ct_net(ct);
+	unsigned int ret;
+
+	ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
+	if (ret == NF_ACCEPT) {
+		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+			ret = alloc_null_binding(ct, hooknum);
+	}
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_fn(unsigned int hooknum,
+	       struct sk_buff *skb,
+	       const struct net_device *in,
+	       const struct net_device *out,
+	       int (*okfn)(struct sk_buff *))
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_nat *nat;
+	/* maniptype == SRC for postrouting. */
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+
+	/* We never see fragments: conntrack defrags on pre-routing
+	 * and local-out, and nf_nat_out protects post-routing.
+	 */
+	NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
+
+	ct = nf_ct_get(skb, &ctinfo);
+	/* Can't track?  It's not due to stress, or conntrack would
+	 * have dropped it.  Hence it's the user's responsibilty to
+	 * packet filter it out, or implement conntrack/NAT for that
+	 * protocol. 8) --RR
+	 */
+	if (!ct)
+		return NF_ACCEPT;
+
+	/* Don't try to NAT if this packet is not conntracked */
+	if (nf_ct_is_untracked(ct))
+		return NF_ACCEPT;
+
+	nat = nfct_nat(ct);
+	if (!nat) {
+		/* NAT module was loaded late. */
+		if (nf_ct_is_confirmed(ct))
+			return NF_ACCEPT;
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+		if (nat == NULL) {
+			pr_debug("failed to add NAT extension\n");
+			return NF_ACCEPT;
+		}
+	}
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED_REPLY:
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+							   hooknum))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+	case IP_CT_NEW:
+		/* Seen it before?  This can happen for loopback, retrans,
+		 * or local packets.
+		 */
+		if (!nf_nat_initialized(ct, maniptype)) {
+			unsigned int ret;
+
+			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+			if (ret != NF_ACCEPT)
+				return ret;
+		} else
+			pr_debug("Already setup manip %s for ct %p\n",
+				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+				 ct);
+		break;
+
+	default:
+		/* ESTABLISHED */
+		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+	}
+
+	return nf_nat_packet(ct, ctinfo, hooknum, skb);
+}
+
+static unsigned int
+nf_nat_ipv4_in(unsigned int hooknum,
+	       struct sk_buff *skb,
+	       const struct net_device *in,
+	       const struct net_device *out,
+	       int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	__be32 daddr = ip_hdr(skb)->daddr;
+
+	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    daddr != ip_hdr(skb)->daddr)
+		skb_dst_drop(skb);
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_out(unsigned int hooknum,
+		struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+#endif
+	unsigned int ret;
+
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr))
+		return NF_ACCEPT;
+
+	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if ((ct->tuplehash[dir].tuple.src.u3.ip !=
+		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
+		    (ct->tuplehash[dir].tuple.src.u.all !=
+		     ct->tuplehash[!dir].tuple.dst.u.all))
+			if (nf_xfrm_me_harder(skb, AF_INET) < 0)
+				ret = NF_DROP;
+	}
+#endif
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_local_fn(unsigned int hooknum,
+		     struct sk_buff *skb,
+		     const struct net_device *in,
+		     const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
+{
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr))
+		return NF_ACCEPT;
+
+	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+		    ct->tuplehash[!dir].tuple.src.u3.ip) {
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
+				ret = NF_DROP;
+		}
+#ifdef CONFIG_XFRM
+		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+			 ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all)
+			if (nf_xfrm_me_harder(skb, AF_INET) < 0)
+				ret = NF_DROP;
+#endif
+	}
+	return ret;
+}
+
+static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv4_in,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv4_out,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP_PRI_NAT_SRC,
+	},
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv4_local_fn,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv4_fn,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV4,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP_PRI_NAT_SRC,
+	},
+};
+
+static int __net_init iptable_nat_net_init(struct net *net)
+{
+	struct ipt_replace *repl;
+
+	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
+	kfree(repl);
+	if (IS_ERR(net->ipv4.nat_table))
+		return PTR_ERR(net->ipv4.nat_table);
+	return 0;
+}
+
+static void __net_exit iptable_nat_net_exit(struct net *net)
+{
+	ipt_unregister_table(net, net->ipv4.nat_table);
+}
+
+static struct pernet_operations iptable_nat_net_ops = {
+	.init	= iptable_nat_net_init,
+	.exit	= iptable_nat_net_exit,
+};
+
+static int __init iptable_nat_init(void)
+{
+	int err;
+
+	err = register_pernet_subsys(&iptable_nat_net_ops);
+	if (err < 0)
+		goto err1;
+
+	err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+	if (err < 0)
+		goto err2;
+	return 0;
+
+err2:
+	unregister_pernet_subsys(&iptable_nat_net_ops);
+err1:
+	return err;
+}
+
+static void __exit iptable_nat_exit(void)
+{
+	nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+	unregister_pernet_subsys(&iptable_nat_net_ops);
+}
+
+module_init(iptable_nat_init);
+module_exit(iptable_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 4ada3295d9a7..fcdd0c2406e6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -29,12 +29,6 @@
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 #include <net/netfilter/nf_log.h>
 
-int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
-			      struct nf_conn *ct,
-			      enum ip_conntrack_info ctinfo,
-			      unsigned int protoff);
-EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
-
 static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
 			      struct nf_conntrack_tuple *tuple)
 {
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index 75464b62f5f2..42d337881171 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -16,7 +16,6 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
 #include <linux/netfilter/nf_conntrack_amanda.h>
 
 MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
deleted file mode 100644
index 44b082fd48ab..000000000000
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ /dev/null
@@ -1,763 +0,0 @@
-/* NAT for netfilter; shared with compatibility layer. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/gfp.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>  /* For tcp_prot in getorigdst */
-#include <linux/icmp.h>
-#include <linux/udp.h>
-#include <linux/jhash.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_zones.h>
-
-static DEFINE_SPINLOCK(nf_nat_lock);
-
-static struct nf_conntrack_l3proto *l3proto __read_mostly;
-
-#define MAX_IP_NAT_PROTO 256
-static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
-						__read_mostly;
-
-static inline const struct nf_nat_protocol *
-__nf_nat_proto_find(u_int8_t protonum)
-{
-	return rcu_dereference(nf_nat_protos[protonum]);
-}
-
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct net *net, u16 zone,
-	    const struct nf_conntrack_tuple *tuple)
-{
-	unsigned int hash;
-
-	/* Original src, to ensure we map it consistently if poss. */
-	hash = jhash_3words((__force u32)tuple->src.u3.ip,
-			    (__force u32)tuple->src.u.all ^ zone,
-			    tuple->dst.protonum, nf_conntrack_hash_rnd);
-	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
-}
-
-/* Is this tuple already taken? (not by us) */
-int
-nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
-		  const struct nf_conn *ignored_conntrack)
-{
-	/* Conntrack tracking doesn't keep track of outgoing tuples; only
-	   incoming ones.  NAT means they don't have a fixed mapping,
-	   so we invert the tuple and look for the incoming reply.
-
-	   We could keep a separate hash if this proves too slow. */
-	struct nf_conntrack_tuple reply;
-
-	nf_ct_invert_tuplepr(&reply, tuple);
-	return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
-}
-EXPORT_SYMBOL(nf_nat_used_tuple);
-
-/* If we source map this tuple so reply looks like reply_tuple, will
- * that meet the constraints of range. */
-static int
-in_range(const struct nf_conntrack_tuple *tuple,
-	 const struct nf_nat_ipv4_range *range)
-{
-	const struct nf_nat_protocol *proto;
-	int ret = 0;
-
-	/* If we are supposed to map IPs, then we must be in the
-	   range specified, otherwise let this drag us onto a new src IP. */
-	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
-		if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
-		    ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
-			return 0;
-	}
-
-	rcu_read_lock();
-	proto = __nf_nat_proto_find(tuple->dst.protonum);
-	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
-	    proto->in_range(tuple, NF_NAT_MANIP_SRC,
-			    &range->min, &range->max))
-		ret = 1;
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static inline int
-same_src(const struct nf_conn *ct,
-	 const struct nf_conntrack_tuple *tuple)
-{
-	const struct nf_conntrack_tuple *t;
-
-	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	return (t->dst.protonum == tuple->dst.protonum &&
-		t->src.u3.ip == tuple->src.u3.ip &&
-		t->src.u.all == tuple->src.u.all);
-}
-
-/* Only called for SRC manip */
-static int
-find_appropriate_src(struct net *net, u16 zone,
-		     const struct nf_conntrack_tuple *tuple,
-		     struct nf_conntrack_tuple *result,
-		     const struct nf_nat_ipv4_range *range)
-{
-	unsigned int h = hash_by_src(net, zone, tuple);
-	const struct nf_conn_nat *nat;
-	const struct nf_conn *ct;
-	const struct hlist_node *n;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
-		ct = nat->ct;
-		if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
-			/* Copy source part from reply tuple. */
-			nf_ct_invert_tuplepr(result,
-				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-			result->dst = tuple->dst;
-
-			if (in_range(result, range)) {
-				rcu_read_unlock();
-				return 1;
-			}
-		}
-	}
-	rcu_read_unlock();
-	return 0;
-}
-
-/* For [FUTURE] fragmentation handling, we want the least-used
-   src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
-   if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
-   1-65535, we don't do pro-rata allocation based on ports; we choose
-   the ip with the lowest src-ip/dst-ip/proto usage.
-*/
-static void
-find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
-		    const struct nf_nat_ipv4_range *range,
-		    const struct nf_conn *ct,
-		    enum nf_nat_manip_type maniptype)
-{
-	__be32 *var_ipp;
-	/* Host order */
-	u_int32_t minip, maxip, j;
-
-	/* No IP mapping?  Do nothing. */
-	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
-		return;
-
-	if (maniptype == NF_NAT_MANIP_SRC)
-		var_ipp = &tuple->src.u3.ip;
-	else
-		var_ipp = &tuple->dst.u3.ip;
-
-	/* Fast path: only one choice. */
-	if (range->min_ip == range->max_ip) {
-		*var_ipp = range->min_ip;
-		return;
-	}
-
-	/* Hashing source and destination IPs gives a fairly even
-	 * spread in practice (if there are a small number of IPs
-	 * involved, there usually aren't that many connections
-	 * anyway).  The consistency means that servers see the same
-	 * client coming from the same IP (some Internet Banking sites
-	 * like this), even across reboots. */
-	minip = ntohl(range->min_ip);
-	maxip = ntohl(range->max_ip);
-	j = jhash_2words((__force u32)tuple->src.u3.ip,
-			 range->flags & NF_NAT_RANGE_PERSISTENT ?
-				0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
-	j = ((u64)j * (maxip - minip + 1)) >> 32;
-	*var_ipp = htonl(minip + j);
-}
-
-/* Manipulate the tuple into the range given.  For NF_INET_POST_ROUTING,
- * we change the source to map into the range.  For NF_INET_PRE_ROUTING
- * and NF_INET_LOCAL_OUT, we change the destination to map into the
- * range.  It might not be possible to get a unique tuple, but we try.
- * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
-static void
-get_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_conntrack_tuple *orig_tuple,
-		 const struct nf_nat_ipv4_range *range,
-		 struct nf_conn *ct,
-		 enum nf_nat_manip_type maniptype)
-{
-	struct net *net = nf_ct_net(ct);
-	const struct nf_nat_protocol *proto;
-	u16 zone = nf_ct_zone(ct);
-
-	/* 1) If this srcip/proto/src-proto-part is currently mapped,
-	   and that same mapping gives a unique tuple within the given
-	   range, use that.
-
-	   This is only required for source (ie. NAT/masq) mappings.
-	   So far, we don't do local source mappings, so multiple
-	   manips not an issue.  */
-	if (maniptype == NF_NAT_MANIP_SRC &&
-	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
-		/* try the original tuple first */
-		if (in_range(orig_tuple, range)) {
-			if (!nf_nat_used_tuple(orig_tuple, ct)) {
-				*tuple = *orig_tuple;
-				return;
-			}
-		} else if (find_appropriate_src(net, zone, orig_tuple, tuple,
-			   range)) {
-			pr_debug("get_unique_tuple: Found current src map\n");
-			if (!nf_nat_used_tuple(tuple, ct))
-				return;
-		}
-	}
-
-	/* 2) Select the least-used IP/proto combination in the given
-	   range. */
-	*tuple = *orig_tuple;
-	find_best_ips_proto(zone, tuple, range, ct, maniptype);
-
-	/* 3) The per-protocol part of the manip is made to map into
-	   the range to make a unique tuple. */
-
-	rcu_read_lock();
-	proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
-
-	/* Only bother mapping if it's not already in range and unique */
-	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
-		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
-			if (proto->in_range(tuple, maniptype, &range->min,
-					    &range->max) &&
-			    (range->min.all == range->max.all ||
-			     !nf_nat_used_tuple(tuple, ct)))
-				goto out;
-		} else if (!nf_nat_used_tuple(tuple, ct)) {
-			goto out;
-		}
-	}
-
-	/* Last change: get protocol to try to obtain unique tuple. */
-	proto->unique_tuple(tuple, range, maniptype, ct);
-out:
-	rcu_read_unlock();
-}
-
-unsigned int
-nf_nat_setup_info(struct nf_conn *ct,
-		  const struct nf_nat_ipv4_range *range,
-		  enum nf_nat_manip_type maniptype)
-{
-	struct net *net = nf_ct_net(ct);
-	struct nf_conntrack_tuple curr_tuple, new_tuple;
-	struct nf_conn_nat *nat;
-
-	/* nat helper or nfctnetlink also setup binding */
-	nat = nfct_nat(ct);
-	if (!nat) {
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL) {
-			pr_debug("failed to add NAT extension\n");
-			return NF_ACCEPT;
-		}
-	}
-
-	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
-		     maniptype == NF_NAT_MANIP_DST);
-	BUG_ON(nf_nat_initialized(ct, maniptype));
-
-	/* What we've got will look like inverse of reply. Normally
-	   this is what is in the conntrack, except for prior
-	   manipulations (future optimization: if num_manips == 0,
-	   orig_tp =
-	   conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
-	nf_ct_invert_tuplepr(&curr_tuple,
-			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
-
-	if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
-		struct nf_conntrack_tuple reply;
-
-		/* Alter conntrack table so will recognize replies. */
-		nf_ct_invert_tuplepr(&reply, &new_tuple);
-		nf_conntrack_alter_reply(ct, &reply);
-
-		/* Non-atomic: we own this at the moment. */
-		if (maniptype == NF_NAT_MANIP_SRC)
-			ct->status |= IPS_SRC_NAT;
-		else
-			ct->status |= IPS_DST_NAT;
-	}
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		unsigned int srchash;
-
-		srchash = hash_by_src(net, nf_ct_zone(ct),
-				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-		spin_lock_bh(&nf_nat_lock);
-		/* nf_conntrack_alter_reply might re-allocate extension area */
-		nat = nfct_nat(ct);
-		nat->ct = ct;
-		hlist_add_head_rcu(&nat->bysource,
-				   &net->ipv4.nat_bysource[srchash]);
-		spin_unlock_bh(&nf_nat_lock);
-	}
-
-	/* It's done. */
-	if (maniptype == NF_NAT_MANIP_DST)
-		ct->status |= IPS_DST_NAT_DONE;
-	else
-		ct->status |= IPS_SRC_NAT_DONE;
-
-	return NF_ACCEPT;
-}
-EXPORT_SYMBOL(nf_nat_setup_info);
-
-/* Returns true if succeeded. */
-static bool
-manip_pkt(u_int16_t proto,
-	  struct sk_buff *skb,
-	  unsigned int iphdroff,
-	  const struct nf_conntrack_tuple *target,
-	  enum nf_nat_manip_type maniptype)
-{
-	struct iphdr *iph;
-	const struct nf_nat_protocol *p;
-
-	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
-		return false;
-
-	iph = (void *)skb->data + iphdroff;
-
-	/* Manipulate protcol part. */
-
-	/* rcu_read_lock()ed by nf_hook_slow */
-	p = __nf_nat_proto_find(proto);
-	if (!p->manip_pkt(skb, iphdroff, target, maniptype))
-		return false;
-
-	iph = (void *)skb->data + iphdroff;
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
-		iph->saddr = target->src.u3.ip;
-	} else {
-		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
-		iph->daddr = target->dst.u3.ip;
-	}
-	return true;
-}
-
-/* Do packet manipulations according to nf_nat_setup_info. */
-unsigned int nf_nat_packet(struct nf_conn *ct,
-			   enum ip_conntrack_info ctinfo,
-			   unsigned int hooknum,
-			   struct sk_buff *skb)
-{
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned long statusbit;
-	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
-
-	if (mtype == NF_NAT_MANIP_SRC)
-		statusbit = IPS_SRC_NAT;
-	else
-		statusbit = IPS_DST_NAT;
-
-	/* Invert if this is reply dir. */
-	if (dir == IP_CT_DIR_REPLY)
-		statusbit ^= IPS_NAT_MASK;
-
-	/* Non-atomic: these bits don't change. */
-	if (ct->status & statusbit) {
-		struct nf_conntrack_tuple target;
-
-		/* We are aiming to look like inverse of other direction. */
-		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
-		if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
-			return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(nf_nat_packet);
-
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int nf_nat_icmp_reply_translation(struct nf_conn *ct,
-				  enum ip_conntrack_info ctinfo,
-				  unsigned int hooknum,
-				  struct sk_buff *skb)
-{
-	struct {
-		struct icmphdr icmp;
-		struct iphdr ip;
-	} *inside;
-	struct nf_conntrack_tuple target;
-	int hdrlen = ip_hdrlen(skb);
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned long statusbit;
-	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
-
-	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
-		return 0;
-
-	inside = (void *)skb->data + hdrlen;
-
-	/* We're actually going to mangle it beyond trivial checksum
-	   adjustment, so make sure the current checksum is correct. */
-	if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
-		return 0;
-
-	/* Must be RELATED */
-	NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
-		     skb->nfctinfo == IP_CT_RELATED_REPLY);
-
-	/* Redirects on non-null nats must be dropped, else they'll
-	   start talking to each other without our translation, and be
-	   confused... --RR */
-	if (inside->icmp.type == ICMP_REDIRECT) {
-		/* If NAT isn't finished, assume it and drop. */
-		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
-			return 0;
-
-		if (ct->status & IPS_NAT_MASK)
-			return 0;
-	}
-
-	if (manip == NF_NAT_MANIP_SRC)
-		statusbit = IPS_SRC_NAT;
-	else
-		statusbit = IPS_DST_NAT;
-
-	/* Invert if this is reply dir. */
-	if (dir == IP_CT_DIR_REPLY)
-		statusbit ^= IPS_NAT_MASK;
-
-	if (!(ct->status & statusbit))
-		return 1;
-
-	pr_debug("icmp_reply_translation: translating error %p manip %u "
-		 "dir %s\n", skb, manip,
-		 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
-
-	/* Change inner back to look like incoming packet.  We do the
-	   opposite manip on this hook to normal, because it might not
-	   pass all hooks (locally-generated ICMP).  Consider incoming
-	   packet: PREROUTING (DST manip), routing produces ICMP, goes
-	   through POSTROUTING (which must correct the DST manip). */
-	if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
-		       &ct->tuplehash[!dir].tuple, !manip))
-		return 0;
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)skb->data + hdrlen;
-		inside->icmp.checksum = 0;
-		inside->icmp.checksum =
-			csum_fold(skb_checksum(skb, hdrlen,
-					       skb->len - hdrlen, 0));
-	}
-
-	/* Change outer to look the reply to an incoming packet
-	 * (proto 0 means don't invert per-proto part). */
-	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-	if (!manip_pkt(0, skb, 0, &target, manip))
-		return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
-
-/* Protocol registration. */
-int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
-{
-	int ret = 0;
-
-	spin_lock_bh(&nf_nat_lock);
-	if (rcu_dereference_protected(
-			nf_nat_protos[proto->protonum],
-			lockdep_is_held(&nf_nat_lock)
-			) != &nf_nat_unknown_protocol) {
-		ret = -EBUSY;
-		goto out;
-	}
-	RCU_INIT_POINTER(nf_nat_protos[proto->protonum], proto);
- out:
-	spin_unlock_bh(&nf_nat_lock);
-	return ret;
-}
-EXPORT_SYMBOL(nf_nat_protocol_register);
-
-/* No one stores the protocol anywhere; simply delete it. */
-void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
-{
-	spin_lock_bh(&nf_nat_lock);
-	RCU_INIT_POINTER(nf_nat_protos[proto->protonum],
-			   &nf_nat_unknown_protocol);
-	spin_unlock_bh(&nf_nat_lock);
-	synchronize_rcu();
-}
-EXPORT_SYMBOL(nf_nat_protocol_unregister);
-
-/* No one using conntrack by the time this called. */
-static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
-{
-	struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
-
-	if (nat == NULL || nat->ct == NULL)
-		return;
-
-	NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
-
-	spin_lock_bh(&nf_nat_lock);
-	hlist_del_rcu(&nat->bysource);
-	spin_unlock_bh(&nf_nat_lock);
-}
-
-static void nf_nat_move_storage(void *new, void *old)
-{
-	struct nf_conn_nat *new_nat = new;
-	struct nf_conn_nat *old_nat = old;
-	struct nf_conn *ct = old_nat->ct;
-
-	if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
-		return;
-
-	spin_lock_bh(&nf_nat_lock);
-	hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
-	spin_unlock_bh(&nf_nat_lock);
-}
-
-static struct nf_ct_ext_type nat_extend __read_mostly = {
-	.len		= sizeof(struct nf_conn_nat),
-	.align		= __alignof__(struct nf_conn_nat),
-	.destroy	= nf_nat_cleanup_conntrack,
-	.move		= nf_nat_move_storage,
-	.id		= NF_CT_EXT_NAT,
-	.flags		= NF_CT_EXT_F_PREALLOC,
-};
-
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
-	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
-	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
-};
-
-static int nfnetlink_parse_nat_proto(struct nlattr *attr,
-				     const struct nf_conn *ct,
-				     struct nf_nat_ipv4_range *range)
-{
-	struct nlattr *tb[CTA_PROTONAT_MAX+1];
-	const struct nf_nat_protocol *npt;
-	int err;
-
-	err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
-	if (err < 0)
-		return err;
-
-	rcu_read_lock();
-	npt = __nf_nat_proto_find(nf_ct_protonum(ct));
-	if (npt->nlattr_to_range)
-		err = npt->nlattr_to_range(tb, range);
-	rcu_read_unlock();
-	return err;
-}
-
-static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
-	[CTA_NAT_MINIP]		= { .type = NLA_U32 },
-	[CTA_NAT_MAXIP]		= { .type = NLA_U32 },
-	[CTA_NAT_PROTO]		= { .type = NLA_NESTED },
-};
-
-static int
-nfnetlink_parse_nat(const struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_ipv4_range *range)
-{
-	struct nlattr *tb[CTA_NAT_MAX+1];
-	int err;
-
-	memset(range, 0, sizeof(*range));
-
-	err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
-	if (err < 0)
-		return err;
-
-	if (tb[CTA_NAT_MINIP])
-		range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
-
-	if (!tb[CTA_NAT_MAXIP])
-		range->max_ip = range->min_ip;
-	else
-		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
-
-	if (range->min_ip)
-		range->flags |= NF_NAT_RANGE_MAP_IPS;
-
-	if (!tb[CTA_NAT_PROTO])
-		return 0;
-
-	err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-
-static int
-nfnetlink_parse_nat_setup(struct nf_conn *ct,
-			  enum nf_nat_manip_type manip,
-			  const struct nlattr *attr)
-{
-	struct nf_nat_ipv4_range range;
-
-	if (nfnetlink_parse_nat(attr, ct, &range) < 0)
-		return -EINVAL;
-	if (nf_nat_initialized(ct, manip))
-		return -EEXIST;
-
-	return nf_nat_setup_info(ct, &range, manip);
-}
-#else
-static int
-nfnetlink_parse_nat_setup(struct nf_conn *ct,
-			  enum nf_nat_manip_type manip,
-			  const struct nlattr *attr)
-{
-	return -EOPNOTSUPP;
-}
-#endif
-
-static int __net_init nf_nat_net_init(struct net *net)
-{
-	/* Leave them the same for the moment. */
-	net->ipv4.nat_htable_size = net->ct.htable_size;
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
-	if (!net->ipv4.nat_bysource)
-		return -ENOMEM;
-	return 0;
-}
-
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct nf_conn *i, void *data)
-{
-	struct nf_conn_nat *nat = nfct_nat(i);
-
-	if (!nat)
-		return 0;
-	memset(nat, 0, sizeof(*nat));
-	i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
-	return 0;
-}
-
-static void __net_exit nf_nat_net_exit(struct net *net)
-{
-	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
-	synchronize_rcu();
-	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
-}
-
-static struct pernet_operations nf_nat_net_ops = {
-	.init = nf_nat_net_init,
-	.exit = nf_nat_net_exit,
-};
-
-static struct nf_ct_helper_expectfn follow_master_nat = {
-	.name		= "nat-follow-master",
-	.expectfn	= nf_nat_follow_master,
-};
-
-static struct nfq_ct_nat_hook nfq_ct_nat = {
-	.seq_adjust	= nf_nat_tcp_seq_adjust,
-};
-
-static int __init nf_nat_init(void)
-{
-	size_t i;
-	int ret;
-
-	need_ipv4_conntrack();
-
-	ret = nf_ct_extend_register(&nat_extend);
-	if (ret < 0) {
-		printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
-		return ret;
-	}
-
-	ret = register_pernet_subsys(&nf_nat_net_ops);
-	if (ret < 0)
-		goto cleanup_extend;
-
-	/* Sew in builtin protocols. */
-	spin_lock_bh(&nf_nat_lock);
-	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
-		RCU_INIT_POINTER(nf_nat_protos[i], &nf_nat_unknown_protocol);
-	RCU_INIT_POINTER(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
-	RCU_INIT_POINTER(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
-	RCU_INIT_POINTER(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
-	spin_unlock_bh(&nf_nat_lock);
-
-	/* Initialize fake conntrack so that NAT will skip it */
-	nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
-
-	l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
-
-	nf_ct_helper_expectfn_register(&follow_master_nat);
-
-	BUG_ON(nf_nat_seq_adjust_hook != NULL);
-	RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
-	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
-	RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
-			   nfnetlink_parse_nat_setup);
-	BUG_ON(nf_ct_nat_offset != NULL);
-	RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
-	RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
-	return 0;
-
- cleanup_extend:
-	nf_ct_extend_unregister(&nat_extend);
-	return ret;
-}
-
-static void __exit nf_nat_cleanup(void)
-{
-	unregister_pernet_subsys(&nf_nat_net_ops);
-	nf_ct_l3proto_put(l3proto);
-	nf_ct_extend_unregister(&nat_extend);
-	nf_ct_helper_expectfn_unregister(&follow_master_nat);
-	RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
-	RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
-	RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
-	RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
-	synchronize_net();
-}
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("nf-nat-ipv4");
-
-module_init(nf_nat_init);
-module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 5589f3af4a8e..dd5e387fc03b 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -15,7 +15,6 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_ftp.h>
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index d2c228db38b5..9c3db10b22d3 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -15,7 +15,6 @@
 
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_h323.h>
@@ -392,7 +391,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_q931_expect(struct nf_conn *new,
 			       struct nf_conntrack_expect *this)
 {
-	struct nf_nat_ipv4_range range;
+	struct nf_nat_range range;
 
 	if (this->tuple.src.u3.ip != 0) {	/* Only accept calls from GK */
 		nf_nat_follow_master(new, this);
@@ -404,14 +403,15 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 
 	/* Change src to where master sends to */
 	range.flags = NF_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
+	range.min_addr = range.max_addr =
+	    new->tuplehash[!this->dir].tuple.src.u3;
 	nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = this->saved_proto;
-	range.min_ip = range.max_ip =
-	    new->master->tuplehash[!this->dir].tuple.src.u3.ip;
+	range.min_proto = range.max_proto = this->saved_proto;
+	range.min_addr = range.max_addr =
+	    new->master->tuplehash[!this->dir].tuple.src.u3;
 	nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
 }
 
@@ -490,20 +490,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_callforwarding_expect(struct nf_conn *new,
 					 struct nf_conntrack_expect *this)
 {
-	struct nf_nat_ipv4_range range;
+	struct nf_nat_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(new->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
 	range.flags = NF_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
+	range.min_addr = range.max_addr =
+	    new->tuplehash[!this->dir].tuple.src.u3;
 	nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = this->saved_proto;
-	range.min_ip = range.max_ip = this->saved_ip;
+	range.min_proto = range.max_proto = this->saved_proto;
+	range.min_addr = range.max_addr = this->saved_addr;
 	nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
 }
 
@@ -519,7 +520,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 	u_int16_t nated_port;
 
 	/* Set expectations for NAT */
-	exp->saved_ip = exp->tuple.dst.u3.ip;
+	exp->saved_addr = exp->tuple.dst.u3;
 	exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
 	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
 	exp->expectfn = ip_nat_callforwarding_expect;
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
deleted file mode 100644
index 2fefec5e757c..000000000000
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/* ip_nat_helper.c - generic support functions for NAT helpers
- *
- * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
- * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/gfp.h>
-#include <linux/kmod.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_helper.h>
-
-#define DUMP_OFFSET(x) \
-	pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
-		 x->offset_before, x->offset_after, x->correction_pos);
-
-static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
-		    int sizediff,
-		    struct nf_conn *ct,
-		    enum ip_conntrack_info ctinfo)
-{
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	struct nf_conn_nat *nat = nfct_nat(ct);
-	struct nf_nat_seq *this_way = &nat->seq[dir];
-
-	pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
-		 seq, sizediff);
-
-	pr_debug("adjust_tcp_sequence: Seq_offset before: ");
-	DUMP_OFFSET(this_way);
-
-	spin_lock_bh(&nf_nat_seqofs_lock);
-
-	/* SYN adjust. If it's uninitialized, or this is after last
-	 * correction, record it: we don't handle more than one
-	 * adjustment in the window, but do deal with common case of a
-	 * retransmit */
-	if (this_way->offset_before == this_way->offset_after ||
-	    before(this_way->correction_pos, seq)) {
-		this_way->correction_pos = seq;
-		this_way->offset_before = this_way->offset_after;
-		this_way->offset_after += sizediff;
-	}
-	spin_unlock_bh(&nf_nat_seqofs_lock);
-
-	pr_debug("adjust_tcp_sequence: Seq_offset after: ");
-	DUMP_OFFSET(this_way);
-}
-
-/* Get the offset value, for conntrack */
-s16 nf_nat_get_offset(const struct nf_conn *ct,
-		      enum ip_conntrack_dir dir,
-		      u32 seq)
-{
-	struct nf_conn_nat *nat = nfct_nat(ct);
-	struct nf_nat_seq *this_way;
-	s16 offset;
-
-	if (!nat)
-		return 0;
-
-	this_way = &nat->seq[dir];
-	spin_lock_bh(&nf_nat_seqofs_lock);
-	offset = after(seq, this_way->correction_pos)
-		 ? this_way->offset_after : this_way->offset_before;
-	spin_unlock_bh(&nf_nat_seqofs_lock);
-
-	return offset;
-}
-EXPORT_SYMBOL_GPL(nf_nat_get_offset);
-
-/* Frobs data inside this packet, which is linear. */
-static void mangle_contents(struct sk_buff *skb,
-			    unsigned int dataoff,
-			    unsigned int match_offset,
-			    unsigned int match_len,
-			    const char *rep_buffer,
-			    unsigned int rep_len)
-{
-	unsigned char *data;
-
-	BUG_ON(skb_is_nonlinear(skb));
-	data = skb_network_header(skb) + dataoff;
-
-	/* move post-replacement */
-	memmove(data + match_offset + rep_len,
-		data + match_offset + match_len,
-		skb->tail - (skb->network_header + dataoff +
-			     match_offset + match_len));
-
-	/* insert data from buffer */
-	memcpy(data + match_offset, rep_buffer, rep_len);
-
-	/* update skb info */
-	if (rep_len > match_len) {
-		pr_debug("nf_nat_mangle_packet: Extending packet by "
-			 "%u from %u bytes\n", rep_len - match_len, skb->len);
-		skb_put(skb, rep_len - match_len);
-	} else {
-		pr_debug("nf_nat_mangle_packet: Shrinking packet from "
-			 "%u from %u bytes\n", match_len - rep_len, skb->len);
-		__skb_trim(skb, skb->len + rep_len - match_len);
-	}
-
-	/* fix IP hdr checksum information */
-	ip_hdr(skb)->tot_len = htons(skb->len);
-	ip_send_check(ip_hdr(skb));
-}
-
-/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
-{
-	if (skb->len + extra > 65535)
-		return 0;
-
-	if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
-		return 0;
-
-	return 1;
-}
-
-void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
-			   __be32 seq, s16 off)
-{
-	if (!off)
-		return;
-	set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
-	adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
-	nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
-}
-EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
-
-void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
-			   u32 ctinfo, int off)
-{
-	const struct tcphdr *th;
-
-	if (nf_ct_protonum(ct) != IPPROTO_TCP)
-		return;
-
-	th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
-	nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
-}
-EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
-
-static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data,
-			int datalen, __sum16 *check, int oldlen)
-{
-	struct rtable *rt = skb_rtable(skb);
-
-	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		if (!(rt->rt_flags & RTCF_LOCAL) &&
-		    (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
-			skb->ip_summed = CHECKSUM_PARTIAL;
-			skb->csum_start = skb_headroom(skb) +
-					  skb_network_offset(skb) +
-					  iph->ihl * 4;
-			skb->csum_offset = (void *)check - data;
-			*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						    datalen, iph->protocol, 0);
-		} else {
-			*check = 0;
-			*check = csum_tcpudp_magic(iph->saddr, iph->daddr,
-						   datalen, iph->protocol,
-						   csum_partial(data, datalen,
-								0));
-			if (iph->protocol == IPPROTO_UDP && !*check)
-				*check = CSUM_MANGLED_0;
-		}
-	} else
-		inet_proto_csum_replace2(check, skb,
-					 htons(oldlen), htons(datalen), 1);
-}
-
-/* Generic function for mangling variable-length address changes inside
- * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
- * command in FTP).
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * */
-int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
-			       struct nf_conn *ct,
-			       enum ip_conntrack_info ctinfo,
-			       unsigned int protoff,
-			       unsigned int match_offset,
-			       unsigned int match_len,
-			       const char *rep_buffer,
-			       unsigned int rep_len, bool adjust)
-{
-	struct iphdr *iph;
-	struct tcphdr *tcph;
-	int oldlen, datalen;
-
-	if (!skb_make_writable(skb, skb->len))
-		return 0;
-
-	if (rep_len > match_len &&
-	    rep_len - match_len > skb_tailroom(skb) &&
-	    !enlarge_skb(skb, rep_len - match_len))
-		return 0;
-
-	SKB_LINEAR_ASSERT(skb);
-
-	iph = ip_hdr(skb);
-	tcph = (void *)iph + iph->ihl*4;
-
-	oldlen = skb->len - iph->ihl*4;
-	mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
-			match_offset, match_len, rep_buffer, rep_len);
-
-	datalen = skb->len - iph->ihl*4;
-	nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen);
-
-	if (adjust && rep_len != match_len)
-		nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
-				      (int)rep_len - (int)match_len);
-
-	return 1;
-}
-EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
-
-/* Generic function for mangling variable-length address changes inside
- * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
- * command in the Amanda protocol)
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
- *       should be fairly easy to do.
- */
-int
-nf_nat_mangle_udp_packet(struct sk_buff *skb,
-			 struct nf_conn *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int protoff,
-			 unsigned int match_offset,
-			 unsigned int match_len,
-			 const char *rep_buffer,
-			 unsigned int rep_len)
-{
-	struct iphdr *iph;
-	struct udphdr *udph;
-	int datalen, oldlen;
-
-	if (!skb_make_writable(skb, skb->len))
-		return 0;
-
-	if (rep_len > match_len &&
-	    rep_len - match_len > skb_tailroom(skb) &&
-	    !enlarge_skb(skb, rep_len - match_len))
-		return 0;
-
-	iph = ip_hdr(skb);
-	udph = (void *)iph + iph->ihl*4;
-
-	oldlen = skb->len - iph->ihl*4;
-	mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
-			match_offset, match_len, rep_buffer, rep_len);
-
-	/* update the length of the UDP packet */
-	datalen = skb->len - iph->ihl*4;
-	udph->len = htons(datalen);
-
-	/* fix udp checksum if udp checksum was previously calculated */
-	if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
-		return 1;
-
-	nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen);
-
-	return 1;
-}
-EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
-
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
-	    struct tcphdr *tcph,
-	    unsigned int sackoff,
-	    unsigned int sackend,
-	    struct nf_nat_seq *natseq)
-{
-	while (sackoff < sackend) {
-		struct tcp_sack_block_wire *sack;
-		__be32 new_start_seq, new_end_seq;
-
-		sack = (void *)skb->data + sackoff;
-		if (after(ntohl(sack->start_seq) - natseq->offset_before,
-			  natseq->correction_pos))
-			new_start_seq = htonl(ntohl(sack->start_seq)
-					- natseq->offset_after);
-		else
-			new_start_seq = htonl(ntohl(sack->start_seq)
-					- natseq->offset_before);
-
-		if (after(ntohl(sack->end_seq) - natseq->offset_before,
-			  natseq->correction_pos))
-			new_end_seq = htonl(ntohl(sack->end_seq)
-				      - natseq->offset_after);
-		else
-			new_end_seq = htonl(ntohl(sack->end_seq)
-				      - natseq->offset_before);
-
-		pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
-			 ntohl(sack->start_seq), new_start_seq,
-			 ntohl(sack->end_seq), new_end_seq);
-
-		inet_proto_csum_replace4(&tcph->check, skb,
-					 sack->start_seq, new_start_seq, 0);
-		inet_proto_csum_replace4(&tcph->check, skb,
-					 sack->end_seq, new_end_seq, 0);
-		sack->start_seq = new_start_seq;
-		sack->end_seq = new_end_seq;
-		sackoff += sizeof(*sack);
-	}
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff *skb,
-		   struct tcphdr *tcph,
-		   struct nf_conn *ct,
-		   enum ip_conntrack_info ctinfo)
-{
-	unsigned int dir, optoff, optend;
-	struct nf_conn_nat *nat = nfct_nat(ct);
-
-	optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
-	optend = ip_hdrlen(skb) + tcph->doff * 4;
-
-	if (!skb_make_writable(skb, optend))
-		return 0;
-
-	dir = CTINFO2DIR(ctinfo);
-
-	while (optoff < optend) {
-		/* Usually: option, length. */
-		unsigned char *op = skb->data + optoff;
-
-		switch (op[0]) {
-		case TCPOPT_EOL:
-			return 1;
-		case TCPOPT_NOP:
-			optoff++;
-			continue;
-		default:
-			/* no partial options */
-			if (optoff + 1 == optend ||
-			    optoff + op[1] > optend ||
-			    op[1] < 2)
-				return 0;
-			if (op[0] == TCPOPT_SACK &&
-			    op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
-			    ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
-				sack_adjust(skb, tcph, optoff+2,
-					    optoff+op[1], &nat->seq[!dir]);
-			optoff += op[1];
-		}
-	}
-	return 1;
-}
-
-/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
-int
-nf_nat_seq_adjust(struct sk_buff *skb,
-		  struct nf_conn *ct,
-		  enum ip_conntrack_info ctinfo,
-		  unsigned int protoff)
-{
-	struct tcphdr *tcph;
-	int dir;
-	__be32 newseq, newack;
-	s16 seqoff, ackoff;
-	struct nf_conn_nat *nat = nfct_nat(ct);
-	struct nf_nat_seq *this_way, *other_way;
-
-	dir = CTINFO2DIR(ctinfo);
-
-	this_way = &nat->seq[dir];
-	other_way = &nat->seq[!dir];
-
-	if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
-		return 0;
-
-	tcph = (void *)skb->data + protoff;
-	if (after(ntohl(tcph->seq), this_way->correction_pos))
-		seqoff = this_way->offset_after;
-	else
-		seqoff = this_way->offset_before;
-
-	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
-		  other_way->correction_pos))
-		ackoff = other_way->offset_after;
-	else
-		ackoff = other_way->offset_before;
-
-	newseq = htonl(ntohl(tcph->seq) + seqoff);
-	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
-	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
-	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
-
-	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-		 ntohl(newack));
-
-	tcph->seq = newseq;
-	tcph->ack_seq = newack;
-
-	return nf_nat_sack_adjust(skb, tcph, ct, ctinfo);
-}
-
-/* Setup NAT on this expected conntrack so it follows master. */
-/* If we fail to get a free NAT slot, we'll get dropped on confirm */
-void nf_nat_follow_master(struct nf_conn *ct,
-			  struct nf_conntrack_expect *exp)
-{
-	struct nf_nat_ipv4_range range;
-
-	/* This must be a fresh one. */
-	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = NF_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = exp->saved_proto;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
-}
-EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index 5b0c20a1f08d..1ce37f89ec78 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -17,7 +17,6 @@
 
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_irc.h>
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
new file mode 100644
index 000000000000..d8b2e14efddc
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -0,0 +1,281 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (ct->status & statusbit) {
+		fl4->daddr = t->dst.u3.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl4->fl4_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl4->saddr = t->src.u3.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl4->fl4_sport = t->src.u.all;
+	}
+}
+#endif /* CONFIG_XFRM */
+
+static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
+				 const struct nf_nat_range *range)
+{
+	return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
+	       ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
+}
+
+static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
+				   __be16 dport)
+{
+	return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
+}
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+				  unsigned int iphdroff,
+				  const struct nf_nat_l4proto *l4proto,
+				  const struct nf_conntrack_tuple *target,
+				  enum nf_nat_manip_type maniptype)
+{
+	struct iphdr *iph;
+	unsigned int hdroff;
+
+	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+		return false;
+
+	iph = (void *)skb->data + iphdroff;
+	hdroff = iphdroff + iph->ihl * 4;
+
+	if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
+				target, maniptype))
+		return false;
+	iph = (void *)skb->data + iphdroff;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+		iph->saddr = target->src.u3.ip;
+	} else {
+		csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+		iph->daddr = target->dst.u3.ip;
+	}
+	return true;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+				    unsigned int iphdroff, __sum16 *check,
+				    const struct nf_conntrack_tuple *t,
+				    enum nf_nat_manip_type maniptype)
+{
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+	__be32 oldip, newip;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		oldip = iph->saddr;
+		newip = t->src.u3.ip;
+	} else {
+		oldip = iph->daddr;
+		newip = t->dst.u3.ip;
+	}
+	inet_proto_csum_replace4(check, skb, oldip, newip, 1);
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+				    u8 proto, void *data, __sum16 *check,
+				    int datalen, int oldlen)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt = skb_rtable(skb);
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (!(rt->rt_flags & RTCF_LOCAL) &&
+		    (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_headroom(skb) +
+					  skb_network_offset(skb) +
+					  ip_hdrlen(skb);
+			skb->csum_offset = (void *)check - data;
+			*check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+						    datalen, proto, 0);
+		} else {
+			*check = 0;
+			*check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+						   datalen, proto,
+						   csum_partial(data, datalen,
+								0));
+			if (proto == IPPROTO_UDP && !*check)
+				*check = CSUM_MANGLED_0;
+		}
+	} else
+		inet_proto_csum_replace2(check, skb,
+					 htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range *range)
+{
+	if (tb[CTA_NAT_V4_MINIP]) {
+		range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V4_MAXIP])
+		range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+	else
+		range->max_addr.ip = range->min_addr.ip;
+
+	return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
+	.l3proto		= NFPROTO_IPV4,
+	.in_range		= nf_nat_ipv4_in_range,
+	.secure_port		= nf_nat_ipv4_secure_port,
+	.manip_pkt		= nf_nat_ipv4_manip_pkt,
+	.csum_update		= nf_nat_ipv4_csum_update,
+	.csum_recalc		= nf_nat_ipv4_csum_recalc,
+	.nlattr_to_range	= nf_nat_ipv4_nlattr_to_range,
+#ifdef CONFIG_XFRM
+	.decode_session		= nf_nat_ipv4_decode_session,
+#endif
+};
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+				  struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int hooknum)
+{
+	struct {
+		struct icmphdr	icmp;
+		struct iphdr	ip;
+	} *inside;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+	unsigned int hdrlen = ip_hdrlen(skb);
+	const struct nf_nat_l4proto *l4proto;
+	struct nf_conntrack_tuple target;
+	unsigned long statusbit;
+
+	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+		return 0;
+	if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+		return 0;
+
+	inside = (void *)skb->data + hdrlen;
+	if (inside->icmp.type == ICMP_REDIRECT) {
+		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+			return 0;
+		if (ct->status & IPS_NAT_MASK)
+			return 0;
+	}
+
+	if (manip == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply direction */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	if (!(ct->status & statusbit))
+		return 1;
+
+	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
+	if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+				   l4proto, &ct->tuplehash[!dir].tuple, !manip))
+		return 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		/* Reloading "inside" here since manip_pkt may reallocate */
+		inside = (void *)skb->data + hdrlen;
+		inside->icmp.checksum = 0;
+		inside->icmp.checksum =
+			csum_fold(skb_checksum(skb, hdrlen,
+					       skb->len - hdrlen, 0));
+	}
+
+	/* Change outer to look like the reply to an incoming packet */
+	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
+	if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+static int __init nf_nat_l3proto_ipv4_init(void)
+{
+	int err;
+
+	err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+	if (err < 0)
+		goto err1;
+	err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
+	if (err < 0)
+		goto err2;
+	return err;
+
+err2:
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+err1:
+	return err;
+}
+
+static void __exit nf_nat_l3proto_ipv4_exit(void)
+{
+	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
+
+module_init(nf_nat_l3proto_ipv4_init);
+module_exit(nf_nat_l3proto_ipv4_exit);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 31ef890d894b..a06d7d74817d 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -22,7 +22,6 @@
 
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_zones.h>
@@ -47,7 +46,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	struct nf_conntrack_tuple t;
 	const struct nf_ct_pptp_master *ct_pptp_info;
 	const struct nf_nat_pptp *nat_pptp_info;
-	struct nf_nat_ipv4_range range;
+	struct nf_nat_range range;
 
 	ct_pptp_info = nfct_help_data(master);
 	nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
@@ -89,21 +88,21 @@ static void pptp_nat_expected(struct nf_conn *ct,
 
 	/* Change src to where master sends to */
 	range.flags = NF_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+	range.min_addr = range.max_addr
+		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
 	if (exp->dir == IP_CT_DIR_ORIGINAL) {
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-		range.min = range.max = exp->saved_proto;
+		range.min_proto = range.max_proto = exp->saved_proto;
 	}
 	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = NF_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+	range.min_addr = range.max_addr
+		= ct->master->tuplehash[!exp->dir].tuple.src.u3;
 	if (exp->dir == IP_CT_DIR_REPLY) {
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-		range.min = range.max = exp->saved_proto;
+		range.min_proto = range.max_proto = exp->saved_proto;
 	}
 	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 }
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
deleted file mode 100644
index 9993bc93e102..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/random.h>
-#include <linux/ip.h>
-
-#include <linux/netfilter.h>
-#include <linux/export.h>
-#include <net/secure_seq.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
-			   enum nf_nat_manip_type maniptype,
-			   const union nf_conntrack_man_proto *min,
-			   const union nf_conntrack_man_proto *max)
-{
-	__be16 port;
-
-	if (maniptype == NF_NAT_MANIP_SRC)
-		port = tuple->src.u.all;
-	else
-		port = tuple->dst.u.all;
-
-	return ntohs(port) >= ntohs(min->all) &&
-	       ntohs(port) <= ntohs(max->all);
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
-
-void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-			       const struct nf_nat_ipv4_range *range,
-			       enum nf_nat_manip_type maniptype,
-			       const struct nf_conn *ct,
-			       u_int16_t *rover)
-{
-	unsigned int range_size, min, i;
-	__be16 *portptr;
-	u_int16_t off;
-
-	if (maniptype == NF_NAT_MANIP_SRC)
-		portptr = &tuple->src.u.all;
-	else
-		portptr = &tuple->dst.u.all;
-
-	/* If no range specified... */
-	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
-		/* If it's dst rewrite, can't change port */
-		if (maniptype == NF_NAT_MANIP_DST)
-			return;
-
-		if (ntohs(*portptr) < 1024) {
-			/* Loose convention: >> 512 is credential passing */
-			if (ntohs(*portptr) < 512) {
-				min = 1;
-				range_size = 511 - min + 1;
-			} else {
-				min = 600;
-				range_size = 1023 - min + 1;
-			}
-		} else {
-			min = 1024;
-			range_size = 65535 - 1024 + 1;
-		}
-	} else {
-		min = ntohs(range->min.all);
-		range_size = ntohs(range->max.all) - min + 1;
-	}
-
-	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
-		off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
-						 maniptype == NF_NAT_MANIP_SRC
-						 ? tuple->dst.u.all
-						 : tuple->src.u.all);
-	else
-		off = *rover;
-
-	for (i = 0; ; ++off) {
-		*portptr = htons(min + off % range_size);
-		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
-			continue;
-		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
-			*rover = off;
-		return;
-	}
-	return;
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
-
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
-				 struct nf_nat_ipv4_range *range)
-{
-	if (tb[CTA_PROTONAT_PORT_MIN]) {
-		range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
-		range->max.all = range->min.tcp.port;
-		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-	}
-	if (tb[CTA_PROTONAT_PORT_MAX]) {
-		range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
-		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
-#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
deleted file mode 100644
index 3f67138d187c..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * DCCP NAT protocol helper
- *
- * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/dccp.h>
-
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-static u_int16_t dccp_port_rover;
-
-static void
-dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_ipv4_range *range,
-		  enum nf_nat_manip_type maniptype,
-		  const struct nf_conn *ct)
-{
-	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-				  &dccp_port_rover);
-}
-
-static bool
-dccp_manip_pkt(struct sk_buff *skb,
-	       unsigned int iphdroff,
-	       const struct nf_conntrack_tuple *tuple,
-	       enum nf_nat_manip_type maniptype)
-{
-	const struct iphdr *iph = (const void *)(skb->data + iphdroff);
-	struct dccp_hdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl * 4;
-	__be32 oldip, newip;
-	__be16 *portptr, oldport, newport;
-	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
-
-	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
-		hdrsize = sizeof(struct dccp_hdr);
-
-	if (!skb_make_writable(skb, hdroff + hdrsize))
-		return false;
-
-	iph = (struct iphdr *)(skb->data + iphdroff);
-	hdr = (struct dccp_hdr *)(skb->data + hdroff);
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		oldip = iph->saddr;
-		newip = tuple->src.u3.ip;
-		newport = tuple->src.u.dccp.port;
-		portptr = &hdr->dccph_sport;
-	} else {
-		oldip = iph->daddr;
-		newip = tuple->dst.u3.ip;
-		newport = tuple->dst.u.dccp.port;
-		portptr = &hdr->dccph_dport;
-	}
-
-	oldport = *portptr;
-	*portptr = newport;
-
-	if (hdrsize < sizeof(*hdr))
-		return true;
-
-	inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1);
-	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
-				 0);
-	return true;
-}
-
-static const struct nf_nat_protocol nf_nat_protocol_dccp = {
-	.protonum		= IPPROTO_DCCP,
-	.manip_pkt		= dccp_manip_pkt,
-	.in_range		= nf_nat_proto_in_range,
-	.unique_tuple		= dccp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-	.nlattr_to_range	= nf_nat_proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_dccp_init(void)
-{
-	return nf_nat_protocol_register(&nf_nat_protocol_dccp);
-}
-
-static void __exit nf_nat_proto_dccp_fini(void)
-{
-	nf_nat_protocol_unregister(&nf_nat_protocol_dccp);
-}
-
-module_init(nf_nat_proto_dccp_init);
-module_exit(nf_nat_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP NAT protocol helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 46ba0b9ab985..ea44f02563b5 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -28,8 +28,7 @@
 #include <linux/ip.h>
 
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 
 MODULE_LICENSE("GPL");
@@ -38,8 +37,9 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 
 /* generate unique tuple ... */
 static void
-gre_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_ipv4_range *range,
+gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		 struct nf_conntrack_tuple *tuple,
+		 const struct nf_nat_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -62,8 +62,8 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 		min = 1;
 		range_size = 0xffff;
 	} else {
-		min = ntohs(range->min.gre.key);
-		range_size = ntohs(range->max.gre.key) - min + 1;
+		min = ntohs(range->min_proto.gre.key);
+		range_size = ntohs(range->max_proto.gre.key) - min + 1;
 	}
 
 	pr_debug("min = %u, range_size = %u\n", min, range_size);
@@ -80,14 +80,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 /* manipulate a GRE packet according to maniptype */
 static bool
-gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
+gre_manip_pkt(struct sk_buff *skb,
+	      const struct nf_nat_l3proto *l3proto,
+	      unsigned int iphdroff, unsigned int hdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
 {
 	const struct gre_hdr *greh;
 	struct gre_hdr_pptp *pgreh;
-	const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
-	unsigned int hdroff = iphdroff + iph->ihl * 4;
 
 	/* pgreh includes two optional 32bit fields which are not required
 	 * to be there.  That's where the magic '8' comes from */
@@ -117,24 +117,24 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 	return true;
 }
 
-static const struct nf_nat_protocol gre = {
-	.protonum		= IPPROTO_GRE,
+static const struct nf_nat_l4proto gre = {
+	.l4proto		= IPPROTO_GRE,
 	.manip_pkt		= gre_manip_pkt,
-	.in_range		= nf_nat_proto_in_range,
+	.in_range		= nf_nat_l4proto_in_range,
 	.unique_tuple		= gre_unique_tuple,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-	.nlattr_to_range	= nf_nat_proto_nlattr_to_range,
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
 #endif
 };
 
 static int __init nf_nat_proto_gre_init(void)
 {
-	return nf_nat_protocol_register(&gre);
+	return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
 }
 
 static void __exit nf_nat_proto_gre_fini(void)
 {
-	nf_nat_protocol_unregister(&gre);
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
 }
 
 module_init(nf_nat_proto_gre_init);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index b35172851bae..eb303471bcf6 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -15,8 +15,7 @@
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
 
 static bool
 icmp_in_range(const struct nf_conntrack_tuple *tuple,
@@ -29,8 +28,9 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
 }
 
 static void
-icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_ipv4_range *range,
+icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		  struct nf_conntrack_tuple *tuple,
+		  const struct nf_nat_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -38,13 +38,14 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 	unsigned int range_size;
 	unsigned int i;
 
-	range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
+	range_size = ntohs(range->max_proto.icmp.id) -
+		     ntohs(range->min_proto.icmp.id) + 1;
 	/* If no range specified... */
 	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
 		range_size = 0xFFFF;
 
 	for (i = 0; ; ++id) {
-		tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
+		tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
 					     (id % range_size));
 		if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
 			return;
@@ -54,13 +55,12 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 static bool
 icmp_manip_pkt(struct sk_buff *skb,
-	       unsigned int iphdroff,
+	       const struct nf_nat_l3proto *l3proto,
+	       unsigned int iphdroff, unsigned int hdroff,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
 {
-	const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	struct icmphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
 
 	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
 		return false;
@@ -72,12 +72,12 @@ icmp_manip_pkt(struct sk_buff *skb,
 	return true;
 }
 
-const struct nf_nat_protocol nf_nat_protocol_icmp = {
-	.protonum		= IPPROTO_ICMP,
+const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
+	.l4proto		= IPPROTO_ICMP,
 	.manip_pkt		= icmp_manip_pkt,
 	.in_range		= icmp_in_range,
 	.unique_tuple		= icmp_unique_tuple,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-	.nlattr_to_range	= nf_nat_proto_nlattr_to_range,
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
 #endif
 };
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
deleted file mode 100644
index 3cce9b6c1c29..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/sctp.h>
-#include <linux/module.h>
-#include <net/sctp/checksum.h>
-
-#include <net/netfilter/nf_nat_protocol.h>
-
-static u_int16_t nf_sctp_port_rover;
-
-static void
-sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_ipv4_range *range,
-		  enum nf_nat_manip_type maniptype,
-		  const struct nf_conn *ct)
-{
-	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-				  &nf_sctp_port_rover);
-}
-
-static bool
-sctp_manip_pkt(struct sk_buff *skb,
-	       unsigned int iphdroff,
-	       const struct nf_conntrack_tuple *tuple,
-	       enum nf_nat_manip_type maniptype)
-{
-	const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
-	struct sk_buff *frag;
-	sctp_sctphdr_t *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-	__be32 oldip, newip;
-	__be32 crc32;
-
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
-		return false;
-
-	iph = (struct iphdr *)(skb->data + iphdroff);
-	hdr = (struct sctphdr *)(skb->data + hdroff);
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		/* Get rid of src ip and src pt */
-		oldip = iph->saddr;
-		newip = tuple->src.u3.ip;
-		hdr->source = tuple->src.u.sctp.port;
-	} else {
-		/* Get rid of dst ip and dst pt */
-		oldip = iph->daddr;
-		newip = tuple->dst.u3.ip;
-		hdr->dest = tuple->dst.u.sctp.port;
-	}
-
-	crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
-	skb_walk_frags(skb, frag)
-		crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
-					  crc32);
-	crc32 = sctp_end_cksum(crc32);
-	hdr->checksum = crc32;
-
-	return true;
-}
-
-static const struct nf_nat_protocol nf_nat_protocol_sctp = {
-	.protonum		= IPPROTO_SCTP,
-	.manip_pkt		= sctp_manip_pkt,
-	.in_range		= nf_nat_proto_in_range,
-	.unique_tuple		= sctp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-	.nlattr_to_range	= nf_nat_proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_sctp_init(void)
-{
-	return nf_nat_protocol_register(&nf_nat_protocol_sctp);
-}
-
-static void __exit nf_nat_proto_sctp_exit(void)
-{
-	nf_nat_protocol_unregister(&nf_nat_protocol_sctp);
-}
-
-module_init(nf_nat_proto_sctp_init);
-module_exit(nf_nat_proto_sctp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SCTP NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
deleted file mode 100644
index 9fb4b4e72bbf..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-#include <net/netfilter/nf_nat_core.h>
-
-static u_int16_t tcp_port_rover;
-
-static void
-tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_ipv4_range *range,
-		 enum nf_nat_manip_type maniptype,
-		 const struct nf_conn *ct)
-{
-	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
-}
-
-static bool
-tcp_manip_pkt(struct sk_buff *skb,
-	      unsigned int iphdroff,
-	      const struct nf_conntrack_tuple *tuple,
-	      enum nf_nat_manip_type maniptype)
-{
-	const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
-	struct tcphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-	__be32 oldip, newip;
-	__be16 *portptr, newport, oldport;
-	int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
-	/* this could be a inner header returned in icmp packet; in such
-	   cases we cannot update the checksum field since it is outside of
-	   the 8 bytes of transport layer headers we are guaranteed */
-	if (skb->len >= hdroff + sizeof(struct tcphdr))
-		hdrsize = sizeof(struct tcphdr);
-
-	if (!skb_make_writable(skb, hdroff + hdrsize))
-		return false;
-
-	iph = (struct iphdr *)(skb->data + iphdroff);
-	hdr = (struct tcphdr *)(skb->data + hdroff);
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		/* Get rid of src ip and src pt */
-		oldip = iph->saddr;
-		newip = tuple->src.u3.ip;
-		newport = tuple->src.u.tcp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst ip and dst pt */
-		oldip = iph->daddr;
-		newip = tuple->dst.u3.ip;
-		newport = tuple->dst.u.tcp.port;
-		portptr = &hdr->dest;
-	}
-
-	oldport = *portptr;
-	*portptr = newport;
-
-	if (hdrsize < sizeof(*hdr))
-		return true;
-
-	inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
-	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
-	return true;
-}
-
-const struct nf_nat_protocol nf_nat_protocol_tcp = {
-	.protonum		= IPPROTO_TCP,
-	.manip_pkt		= tcp_manip_pkt,
-	.in_range		= nf_nat_proto_in_range,
-	.unique_tuple		= tcp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-	.nlattr_to_range	= nf_nat_proto_nlattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
deleted file mode 100644
index 9883336e628f..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-static u_int16_t udp_port_rover;
-
-static void
-udp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_ipv4_range *range,
-		 enum nf_nat_manip_type maniptype,
-		 const struct nf_conn *ct)
-{
-	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
-}
-
-static bool
-udp_manip_pkt(struct sk_buff *skb,
-	      unsigned int iphdroff,
-	      const struct nf_conntrack_tuple *tuple,
-	      enum nf_nat_manip_type maniptype)
-{
-	const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
-	struct udphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-	__be32 oldip, newip;
-	__be16 *portptr, newport;
-
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
-		return false;
-
-	iph = (struct iphdr *)(skb->data + iphdroff);
-	hdr = (struct udphdr *)(skb->data + hdroff);
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		/* Get rid of src ip and src pt */
-		oldip = iph->saddr;
-		newip = tuple->src.u3.ip;
-		newport = tuple->src.u.udp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst ip and dst pt */
-		oldip = iph->daddr;
-		newip = tuple->dst.u3.ip;
-		newport = tuple->dst.u.udp.port;
-		portptr = &hdr->dest;
-	}
-	if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
-		inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
-		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
-					 0);
-		if (!hdr->check)
-			hdr->check = CSUM_MANGLED_0;
-	}
-	*portptr = newport;
-	return true;
-}
-
-const struct nf_nat_protocol nf_nat_protocol_udp = {
-	.protonum		= IPPROTO_UDP,
-	.manip_pkt		= udp_manip_pkt,
-	.in_range		= nf_nat_proto_in_range,
-	.unique_tuple		= udp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-	.nlattr_to_range	= nf_nat_proto_nlattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
deleted file mode 100644
index d24d10a7beb2..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-static u_int16_t udplite_port_rover;
-
-static void
-udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
-		     const struct nf_nat_ipv4_range *range,
-		     enum nf_nat_manip_type maniptype,
-		     const struct nf_conn *ct)
-{
-	nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
-				  &udplite_port_rover);
-}
-
-static bool
-udplite_manip_pkt(struct sk_buff *skb,
-		  unsigned int iphdroff,
-		  const struct nf_conntrack_tuple *tuple,
-		  enum nf_nat_manip_type maniptype)
-{
-	const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
-	struct udphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-	__be32 oldip, newip;
-	__be16 *portptr, newport;
-
-	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
-		return false;
-
-	iph = (struct iphdr *)(skb->data + iphdroff);
-	hdr = (struct udphdr *)(skb->data + hdroff);
-
-	if (maniptype == NF_NAT_MANIP_SRC) {
-		/* Get rid of src ip and src pt */
-		oldip = iph->saddr;
-		newip = tuple->src.u3.ip;
-		newport = tuple->src.u.udp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst ip and dst pt */
-		oldip = iph->daddr;
-		newip = tuple->dst.u3.ip;
-		newport = tuple->dst.u.udp.port;
-		portptr = &hdr->dest;
-	}
-
-	inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
-	inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
-	if (!hdr->check)
-		hdr->check = CSUM_MANGLED_0;
-
-	*portptr = newport;
-	return true;
-}
-
-static const struct nf_nat_protocol nf_nat_protocol_udplite = {
-	.protonum		= IPPROTO_UDPLITE,
-	.manip_pkt		= udplite_manip_pkt,
-	.in_range		= nf_nat_proto_in_range,
-	.unique_tuple		= udplite_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-	.nlattr_to_range	= nf_nat_proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_udplite_init(void)
-{
-	return nf_nat_protocol_register(&nf_nat_protocol_udplite);
-}
-
-static void __exit nf_nat_proto_udplite_fini(void)
-{
-	nf_nat_protocol_unregister(&nf_nat_protocol_udplite);
-}
-
-module_init(nf_nat_proto_udplite_init);
-module_exit(nf_nat_proto_udplite_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
deleted file mode 100644
index e0afe8112b1c..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* The "unknown" protocol.  This is what is used for protocols we
- * don't understand.  It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
-			     enum nf_nat_manip_type manip_type,
-			     const union nf_conntrack_man_proto *min,
-			     const union nf_conntrack_man_proto *max)
-{
-	return true;
-}
-
-static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_ipv4_range *range,
-				 enum nf_nat_manip_type maniptype,
-				 const struct nf_conn *ct)
-{
-	/* Sorry: we can't help you; if it's not unique, we can't frob
-	   anything. */
-	return;
-}
-
-static bool
-unknown_manip_pkt(struct sk_buff *skb,
-		  unsigned int iphdroff,
-		  const struct nf_conntrack_tuple *tuple,
-		  enum nf_nat_manip_type maniptype)
-{
-	return true;
-}
-
-const struct nf_nat_protocol nf_nat_unknown_protocol = {
-	.manip_pkt		= unknown_manip_pkt,
-	.in_range		= unknown_in_range,
-	.unique_tuple		= unknown_unique_tuple,
-};
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
deleted file mode 100644
index d2a9dc314e0e..000000000000
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Everything about the rules for NAT. */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/slab.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/bitops.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-
-#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
-			 (1 << NF_INET_POST_ROUTING) | \
-			 (1 << NF_INET_LOCAL_OUT) | \
-			 (1 << NF_INET_LOCAL_IN))
-
-static const struct xt_table nat_table = {
-	.name		= "nat",
-	.valid_hooks	= NAT_VALID_HOOKS,
-	.me		= THIS_MODULE,
-	.af		= NFPROTO_IPV4,
-};
-
-/* Source NAT */
-static unsigned int
-ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
-	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
-		     par->hooknum == NF_INET_LOCAL_IN);
-
-	ct = nf_ct_get(skb, &ctinfo);
-
-	/* Connection must be valid and new. */
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			    ctinfo == IP_CT_RELATED_REPLY));
-	NF_CT_ASSERT(par->out != NULL);
-
-	return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC);
-}
-
-static unsigned int
-ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
-	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
-		     par->hooknum == NF_INET_LOCAL_OUT);
-
-	ct = nf_ct_get(skb, &ctinfo);
-
-	/* Connection must be valid and new. */
-	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
-	return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST);
-}
-
-static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
-{
-	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
-	/* Must be a valid range */
-	if (mr->rangesize != 1) {
-		pr_info("SNAT: multiple ranges no longer supported\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
-{
-	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
-
-	/* Must be a valid range */
-	if (mr->rangesize != 1) {
-		pr_info("DNAT: multiple ranges no longer supported\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static unsigned int
-alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
-{
-	/* Force range to this IP; let proto decide mapping for
-	   per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED).
-	*/
-	struct nf_nat_ipv4_range range;
-
-	range.flags = 0;
-	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
-		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
-		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
-		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
-
-	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
-}
-
-int nf_nat_rule_find(struct sk_buff *skb,
-		     unsigned int hooknum,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     struct nf_conn *ct)
-{
-	struct net *net = nf_ct_net(ct);
-	int ret;
-
-	ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
-
-	if (ret == NF_ACCEPT) {
-		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
-			/* NUL mapping */
-			ret = alloc_null_binding(ct, hooknum);
-	}
-	return ret;
-}
-
-static struct xt_target ipt_snat_reg __read_mostly = {
-	.name		= "SNAT",
-	.target		= ipt_snat_target,
-	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
-	.table		= "nat",
-	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
-	.checkentry	= ipt_snat_checkentry,
-	.family		= AF_INET,
-};
-
-static struct xt_target ipt_dnat_reg __read_mostly = {
-	.name		= "DNAT",
-	.target		= ipt_dnat_target,
-	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
-	.table		= "nat",
-	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
-	.checkentry	= ipt_dnat_checkentry,
-	.family		= AF_INET,
-};
-
-static int __net_init nf_nat_rule_net_init(struct net *net)
-{
-	struct ipt_replace *repl;
-
-	repl = ipt_alloc_initial_table(&nat_table);
-	if (repl == NULL)
-		return -ENOMEM;
-	net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl);
-	kfree(repl);
-	if (IS_ERR(net->ipv4.nat_table))
-		return PTR_ERR(net->ipv4.nat_table);
-	return 0;
-}
-
-static void __net_exit nf_nat_rule_net_exit(struct net *net)
-{
-	ipt_unregister_table(net, net->ipv4.nat_table);
-}
-
-static struct pernet_operations nf_nat_rule_net_ops = {
-	.init = nf_nat_rule_net_init,
-	.exit = nf_nat_rule_net_exit,
-};
-
-int __init nf_nat_rule_init(void)
-{
-	int ret;
-
-	ret = register_pernet_subsys(&nf_nat_rule_net_ops);
-	if (ret != 0)
-		goto out;
-	ret = xt_register_target(&ipt_snat_reg);
-	if (ret != 0)
-		goto unregister_table;
-
-	ret = xt_register_target(&ipt_dnat_reg);
-	if (ret != 0)
-		goto unregister_snat;
-
-	return ret;
-
- unregister_snat:
-	xt_unregister_target(&ipt_snat_reg);
- unregister_table:
-	unregister_pernet_subsys(&nf_nat_rule_net_ops);
- out:
-	return ret;
-}
-
-void nf_nat_rule_cleanup(void)
-{
-	xt_unregister_target(&ipt_dnat_reg);
-	xt_unregister_target(&ipt_snat_reg);
-	unregister_pernet_subsys(&nf_nat_rule_net_ops);
-}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index df626af8413c..47a47186a791 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -19,7 +19,6 @@
 
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <linux/netfilter/nf_conntrack_sip.h>
@@ -255,15 +254,15 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
 static void ip_nat_sip_expected(struct nf_conn *ct,
 				struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_ipv4_range range;
+	struct nf_nat_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* For DST manip, map port here to where it's expected. */
 	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = exp->saved_proto;
-	range.min_ip = range.max_ip = exp->saved_ip;
+	range.min_proto = range.max_proto = exp->saved_proto;
+	range.min_addr = range.max_addr = exp->saved_addr;
 	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 
 	/* Change src to where master sends to, but only if the connection
@@ -271,8 +270,8 @@ static void ip_nat_sip_expected(struct nf_conn *ct,
 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
 	    ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
 		range.flags = NF_NAT_RANGE_MAP_IPS;
-		range.min_ip = range.max_ip
-			= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+		range.min_addr = range.max_addr
+			= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
 		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 	}
 }
@@ -307,7 +306,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	else
 		port = ntohs(exp->tuple.dst.u.udp.port);
 
-	exp->saved_ip = exp->tuple.dst.u3.ip;
+	exp->saved_addr = exp->tuple.dst.u3;
 	exp->tuple.dst.u3.ip = newip;
 	exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
 	exp->dir = !dir;
@@ -329,7 +328,7 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 	if (port == 0)
 		return NF_DROP;
 
-	if (exp->tuple.dst.u3.ip != exp->saved_ip ||
+	if (exp->tuple.dst.u3.ip != exp->saved_addr.ip ||
 	    exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
 		buflen = sprintf(buffer, "%pI4:%u", &newip, port);
 		if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
@@ -485,13 +484,13 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
 	else
 		rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
 
-	rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
+	rtp_exp->saved_addr = rtp_exp->tuple.dst.u3;
 	rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
 	rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
 	rtp_exp->dir = !dir;
 	rtp_exp->expectfn = ip_nat_sip_expected;
 
-	rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
+	rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3;
 	rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
 	rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
 	rtcp_exp->dir = !dir;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
deleted file mode 100644
index 3828a4229822..000000000000
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ /dev/null
@@ -1,326 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/gfp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/spinlock.h>
-
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_extend.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-#ifdef CONFIG_XFRM
-static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
-{
-	struct flowi4 *fl4 = &fl->u.ip4;
-	const struct nf_conn *ct;
-	const struct nf_conntrack_tuple *t;
-	enum ip_conntrack_info ctinfo;
-	enum ip_conntrack_dir dir;
-	unsigned long statusbit;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (ct == NULL)
-		return;
-	dir = CTINFO2DIR(ctinfo);
-	t = &ct->tuplehash[dir].tuple;
-
-	if (dir == IP_CT_DIR_ORIGINAL)
-		statusbit = IPS_DST_NAT;
-	else
-		statusbit = IPS_SRC_NAT;
-
-	if (ct->status & statusbit) {
-		fl4->daddr = t->dst.u3.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl4->fl4_dport = t->dst.u.tcp.port;
-	}
-
-	statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		fl4->saddr = t->src.u3.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl4->fl4_sport = t->src.u.tcp.port;
-	}
-}
-#endif
-
-static unsigned int
-nf_nat_fn(unsigned int hooknum,
-	  struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  int (*okfn)(struct sk_buff *))
-{
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn_nat *nat;
-	/* maniptype == SRC for postrouting. */
-	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
-	/* We never see fragments: conntrack defrags on pre-routing
-	   and local-out, and nf_nat_out protects post-routing. */
-	NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
-
-	ct = nf_ct_get(skb, &ctinfo);
-	/* Can't track?  It's not due to stress, or conntrack would
-	   have dropped it.  Hence it's the user's responsibilty to
-	   packet filter it out, or implement conntrack/NAT for that
-	   protocol. 8) --RR */
-	if (!ct)
-		return NF_ACCEPT;
-
-	/* Don't try to NAT if this packet is not conntracked */
-	if (nf_ct_is_untracked(ct))
-		return NF_ACCEPT;
-
-	nat = nfct_nat(ct);
-	if (!nat) {
-		/* NAT module was loaded late. */
-		if (nf_ct_is_confirmed(ct))
-			return NF_ACCEPT;
-		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
-		if (nat == NULL) {
-			pr_debug("failed to add NAT extension\n");
-			return NF_ACCEPT;
-		}
-	}
-
-	switch (ctinfo) {
-	case IP_CT_RELATED:
-	case IP_CT_RELATED_REPLY:
-		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
-			if (!nf_nat_icmp_reply_translation(ct, ctinfo,
-							   hooknum, skb))
-				return NF_DROP;
-			else
-				return NF_ACCEPT;
-		}
-		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
-	case IP_CT_NEW:
-
-		/* Seen it before?  This can happen for loopback, retrans,
-		   or local packets.. */
-		if (!nf_nat_initialized(ct, maniptype)) {
-			unsigned int ret;
-
-			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
-			if (ret != NF_ACCEPT)
-				return ret;
-		} else
-			pr_debug("Already setup manip %s for ct %p\n",
-				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
-				 ct);
-		break;
-
-	default:
-		/* ESTABLISHED */
-		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
-			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-	}
-
-	return nf_nat_packet(ct, ctinfo, hooknum, skb);
-}
-
-static unsigned int
-nf_nat_in(unsigned int hooknum,
-	  struct sk_buff *skb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  int (*okfn)(struct sk_buff *))
-{
-	unsigned int ret;
-	__be32 daddr = ip_hdr(skb)->daddr;
-
-	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    daddr != ip_hdr(skb)->daddr)
-		skb_dst_drop(skb);
-
-	return ret;
-}
-
-static unsigned int
-nf_nat_out(unsigned int hooknum,
-	   struct sk_buff *skb,
-	   const struct net_device *in,
-	   const struct net_device *out,
-	   int (*okfn)(struct sk_buff *))
-{
-#ifdef CONFIG_XFRM
-	const struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-#endif
-	unsigned int ret;
-
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(skb) < sizeof(struct iphdr))
-		return NF_ACCEPT;
-
-	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
-#ifdef CONFIG_XFRM
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if ((ct->tuplehash[dir].tuple.src.u3.ip !=
-		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
-		    (ct->tuplehash[dir].tuple.src.u.all !=
-		     ct->tuplehash[!dir].tuple.dst.u.all)
-		   )
-			return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
-	}
-#endif
-	return ret;
-}
-
-static unsigned int
-nf_nat_local_fn(unsigned int hooknum,
-		struct sk_buff *skb,
-		const struct net_device *in,
-		const struct net_device *out,
-		int (*okfn)(struct sk_buff *))
-{
-	const struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int ret;
-
-	/* root is playing with raw sockets. */
-	if (skb->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(skb) < sizeof(struct iphdr))
-		return NF_ACCEPT;
-
-	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
-	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
-		    ct->tuplehash[!dir].tuple.src.u3.ip) {
-			if (ip_route_me_harder(skb, RTN_UNSPEC))
-				ret = NF_DROP;
-		}
-#ifdef CONFIG_XFRM
-		else if (ct->tuplehash[dir].tuple.dst.u.all !=
-			 ct->tuplehash[!dir].tuple.src.u.all)
-			if (ip_xfrm_me_harder(skb))
-				ret = NF_DROP;
-#endif
-	}
-	return ret;
-}
-
-/* We must be after connection tracking and before packet filtering. */
-
-static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
-	/* Before packet filtering, change destination */
-	{
-		.hook		= nf_nat_in,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_PRE_ROUTING,
-		.priority	= NF_IP_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= nf_nat_out,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_POST_ROUTING,
-		.priority	= NF_IP_PRI_NAT_SRC,
-	},
-	/* Before packet filtering, change destination */
-	{
-		.hook		= nf_nat_local_fn,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_OUT,
-		.priority	= NF_IP_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= nf_nat_fn,
-		.owner		= THIS_MODULE,
-		.pf		= NFPROTO_IPV4,
-		.hooknum	= NF_INET_LOCAL_IN,
-		.priority	= NF_IP_PRI_NAT_SRC,
-	},
-};
-
-static int __init nf_nat_standalone_init(void)
-{
-	int ret = 0;
-
-	need_ipv4_conntrack();
-
-#ifdef CONFIG_XFRM
-	BUG_ON(ip_nat_decode_session != NULL);
-	RCU_INIT_POINTER(ip_nat_decode_session, nat_decode_session);
-#endif
-	ret = nf_nat_rule_init();
-	if (ret < 0) {
-		pr_err("nf_nat_init: can't setup rules.\n");
-		goto cleanup_decode_session;
-	}
-	ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
-	if (ret < 0) {
-		pr_err("nf_nat_init: can't register hooks.\n");
-		goto cleanup_rule_init;
-	}
-	return ret;
-
- cleanup_rule_init:
-	nf_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
-	RCU_INIT_POINTER(ip_nat_decode_session, NULL);
-	synchronize_net();
-#endif
-	return ret;
-}
-
-static void __exit nf_nat_standalone_fini(void)
-{
-	nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
-	nf_nat_rule_cleanup();
-#ifdef CONFIG_XFRM
-	RCU_INIT_POINTER(ip_nat_decode_session, NULL);
-	synchronize_net();
-#endif
-	/* Conntrack caches are unregistered in nf_conntrack_cleanup */
-}
-
-module_init(nf_nat_standalone_init);
-module_exit(nf_nat_standalone_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat");
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 9dbb8d284f99..ccabbda71a3e 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -11,7 +11,6 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
 #include <linux/netfilter/nf_conntrack_tftp.h>
 
 MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c19b214ffd57..91adddae20a4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -356,6 +356,30 @@ config NETFILTER_NETLINK_QUEUE_CT
 	  If this option is enabled, NFQUEUE can include Connection Tracking
 	  information together with the packet is the enqueued via NFNETLINK.
 
+config NF_NAT
+	tristate
+
+config NF_NAT_NEEDED
+	bool
+	depends on NF_NAT
+	default y
+
+config NF_NAT_PROTO_DCCP
+	tristate
+	depends on NF_NAT && NF_CT_PROTO_DCCP
+	default NF_NAT && NF_CT_PROTO_DCCP
+
+config NF_NAT_PROTO_UDPLITE
+	tristate
+	depends on NF_NAT && NF_CT_PROTO_UDPLITE
+	default NF_NAT && NF_CT_PROTO_UDPLITE
+
+config NF_NAT_PROTO_SCTP
+	tristate
+	default NF_NAT && NF_CT_PROTO_SCTP
+	depends on NF_NAT && NF_CT_PROTO_SCTP
+	select LIBCRC32C
+
 endif # NF_CONNTRACK
 
 # transparent proxy support
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1c5160f2278e..09c9451bc510 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -43,6 +43,17 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
 obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
 
+nf_nat-y	:= nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
+		   nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
+
+obj-$(CONFIG_NF_NAT) += nf_nat.o
+obj-$(CONFIG_NF_NAT) += xt_nat.o
+
+# NAT protocols (nf_nat)
+obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
+obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
+obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+
 # transparent proxy support
 obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 8f4b0b2b6f80..e61b3ac9591b 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -275,6 +275,11 @@ EXPORT_SYMBOL_GPL(nfq_ct_nat_hook);
 
 #endif /* CONFIG_NF_CONNTRACK */
 
+#ifdef CONFIG_NF_NAT_NEEDED
+void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
+EXPORT_SYMBOL(nf_nat_decode_session_hook);
+#endif
+
 #ifdef CONFIG_PROC_FS
 struct proc_dir_entry *proc_net_netfilter;
 EXPORT_SYMBOL(proc_net_netfilter);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index cf4875565d67..f83e79defed9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -55,6 +55,12 @@ int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
 				      const struct nlattr *attr) __read_mostly;
 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
 
+int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
+			      struct nf_conn *ct,
+			      enum ip_conntrack_info ctinfo,
+			      unsigned int protoff);
+EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
+
 DEFINE_SPINLOCK(nf_conntrack_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_lock);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index da4fc37a8578..966f5133a384 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,7 +45,7 @@
 #include <net/netfilter/nf_conntrack_timestamp.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
 #include <net/netfilter/nf_nat_helper.h>
 #endif
 
@@ -1096,13 +1096,14 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 			  const struct nlattr *attr)
 {
 	typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup;
+	int err;
 
 	parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook);
 	if (!parse_nat_setup) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
 		nfnl_unlock();
-		if (request_module("nf-nat-ipv4") < 0) {
+		if (request_module("nf-nat") < 0) {
 			nfnl_lock();
 			rcu_read_lock();
 			return -EOPNOTSUPP;
@@ -1115,7 +1116,26 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 		return -EOPNOTSUPP;
 	}
 
-	return parse_nat_setup(ct, manip, attr);
+	err = parse_nat_setup(ct, manip, attr);
+	if (err == -EAGAIN) {
+#ifdef CONFIG_MODULES
+		rcu_read_unlock();
+		spin_unlock_bh(&nf_conntrack_lock);
+		nfnl_unlock();
+		if (request_module("nf-nat-%u", nf_ct_l3num(ct)) < 0) {
+			nfnl_lock();
+			spin_lock_bh(&nf_conntrack_lock);
+			rcu_read_lock();
+			return -EOPNOTSUPP;
+		}
+		nfnl_lock();
+		spin_lock_bh(&nf_conntrack_lock);
+		rcu_read_lock();
+#else
+		err = -EOPNOTSUPP;
+#endif
+	}
+	return err;
 }
 #endif
 
@@ -1979,6 +1999,8 @@ nla_put_failure:
 	return -1;
 }
 
+static const union nf_inet_addr any_addr;
+
 static int
 ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			  const struct nf_conntrack_expect *exp)
@@ -2005,7 +2027,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 		goto nla_put_failure;
 
 #ifdef CONFIG_NF_NAT_NEEDED
-	if (exp->saved_ip || exp->saved_proto.all) {
+	if (!nf_inet_addr_cmp(&exp->saved_addr, &any_addr) ||
+	    exp->saved_proto.all) {
 		nest_parms = nla_nest_start(skb, CTA_EXPECT_NAT | NLA_F_NESTED);
 		if (!nest_parms)
 			goto nla_put_failure;
@@ -2014,7 +2037,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
 			goto nla_put_failure;
 
 		nat_tuple.src.l3num = nf_ct_l3num(master);
-		nat_tuple.src.u3.ip = exp->saved_ip;
+		nat_tuple.src.u3 = exp->saved_addr;
 		nat_tuple.dst.protonum = nf_ct_protonum(master);
 		nat_tuple.src.u = exp->saved_proto;
 
@@ -2410,7 +2433,7 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
 	if (err < 0)
 		return err;
 
-	exp->saved_ip = nat_tuple.src.u3.ip;
+	exp->saved_addr = nat_tuple.src.u3;
 	exp->saved_proto = nat_tuple.src.u;
 	exp->dir = ntohl(nla_get_be32(tb[CTA_EXPECT_NAT_DIR]));
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index a5ac11ebef33..9c2cc716f4a5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -505,10 +505,10 @@ static inline s16 nat_offset(const struct nf_conn *ct,
 
 	return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
 }
-#define NAT_OFFSET(pf, ct, dir, seq) \
-	(pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
+#define NAT_OFFSET(ct, dir, seq) \
+	(nat_offset(ct, dir, seq))
 #else
-#define NAT_OFFSET(pf, ct, dir, seq)	0
+#define NAT_OFFSET(ct, dir, seq)	0
 #endif
 
 static bool tcp_in_window(const struct nf_conn *ct,
@@ -541,7 +541,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 		tcp_sack(skb, dataoff, tcph, &sack);
 
 	/* Take into account NAT sequence number mangling */
-	receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
+	receiver_offset = NAT_OFFSET(ct, !dir, ack - 1);
 	ack -= receiver_offset;
 	sack -= receiver_offset;
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 590f0abaab8c..d5174902db37 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -946,11 +946,11 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 			break;
 #ifdef CONFIG_NF_NAT_NEEDED
 		if (exp->tuple.src.l3num == AF_INET && !direct_rtp &&
-		    (exp->saved_ip != exp->tuple.dst.u3.ip ||
+		    (exp->saved_addr.ip != exp->tuple.dst.u3.ip ||
 		     exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) &&
 		    ct->status & IPS_NAT_MASK) {
-			daddr->ip		= exp->saved_ip;
-			tuple.dst.u3.ip		= exp->saved_ip;
+			daddr->ip		= exp->saved_addr.ip;
+			tuple.dst.u3.ip		= exp->saved_addr.ip;
 			tuple.dst.u.udp.port	= exp->saved_proto.udp.port;
 			direct_rtp = 1;
 		} else
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
new file mode 100644
index 000000000000..c577b753fb9a
--- /dev/null
+++ b/net/netfilter/nf_nat_core.c
@@ -0,0 +1,854 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/gfp.h>
+#include <net/xfrm.h>
+#include <linux/jhash.h>
+#include <linux/rtnetlink.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/netfilter/nf_nat.h>
+
+static DEFINE_SPINLOCK(nf_nat_lock);
+
+static DEFINE_MUTEX(nf_nat_proto_mutex);
+static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
+						__read_mostly;
+static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
+						__read_mostly;
+
+
+inline const struct nf_nat_l3proto *
+__nf_nat_l3proto_find(u8 family)
+{
+	return rcu_dereference(nf_nat_l3protos[family]);
+}
+
+inline const struct nf_nat_l4proto *
+__nf_nat_l4proto_find(u8 family, u8 protonum)
+{
+	return rcu_dereference(nf_nat_l4protos[family][protonum]);
+}
+EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find);
+
+#ifdef CONFIG_XFRM
+static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+	const struct nf_nat_l3proto *l3proto;
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	enum ip_conntrack_dir dir;
+	unsigned  long statusbit;
+	u8 family;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return;
+
+	family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	rcu_read_lock();
+	l3proto = __nf_nat_l3proto_find(family);
+	if (l3proto == NULL)
+		goto out;
+
+	dir = CTINFO2DIR(ctinfo);
+	if (dir == IP_CT_DIR_ORIGINAL)
+		statusbit = IPS_DST_NAT;
+	else
+		statusbit = IPS_SRC_NAT;
+
+	l3proto->decode_session(skb, ct, dir, statusbit, fl);
+out:
+	rcu_read_unlock();
+}
+
+int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
+{
+	struct flowi fl;
+	unsigned int hh_len;
+	struct dst_entry *dst;
+
+	if (xfrm_decode_session(skb, &fl, family) < 0)
+		return -1;
+
+	dst = skb_dst(skb);
+	if (dst->xfrm)
+		dst = ((struct xfrm_dst *)dst)->route;
+	dst_hold(dst);
+
+	dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
+	if (IS_ERR(dst))
+		return -1;
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	/* Change in oif may mean change in hh_len. */
+	hh_len = skb_dst(skb)->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+		return -1;
+	return 0;
+}
+EXPORT_SYMBOL(nf_xfrm_me_harder);
+#endif /* CONFIG_XFRM */
+
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct net *net, u16 zone,
+	    const struct nf_conntrack_tuple *tuple)
+{
+	unsigned int hash;
+
+	/* Original src, to ensure we map it consistently if poss. */
+	hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+		      tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
+	return ((u64)hash * net->ct.nat_htable_size) >> 32;
+}
+
+/* Is this tuple already taken? (not by us) */
+int
+nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
+		  const struct nf_conn *ignored_conntrack)
+{
+	/* Conntrack tracking doesn't keep track of outgoing tuples; only
+	 * incoming ones.  NAT means they don't have a fixed mapping,
+	 * so we invert the tuple and look for the incoming reply.
+	 *
+	 * We could keep a separate hash if this proves too slow.
+	 */
+	struct nf_conntrack_tuple reply;
+
+	nf_ct_invert_tuplepr(&reply, tuple);
+	return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
+}
+EXPORT_SYMBOL(nf_nat_used_tuple);
+
+/* If we source map this tuple so reply looks like reply_tuple, will
+ * that meet the constraints of range.
+ */
+static int in_range(const struct nf_nat_l3proto *l3proto,
+		    const struct nf_nat_l4proto *l4proto,
+		    const struct nf_conntrack_tuple *tuple,
+		    const struct nf_nat_range *range)
+{
+	/* If we are supposed to map IPs, then we must be in the
+	 * range specified, otherwise let this drag us onto a new src IP.
+	 */
+	if (range->flags & NF_NAT_RANGE_MAP_IPS &&
+	    !l3proto->in_range(tuple, range))
+		return 0;
+
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
+	    l4proto->in_range(tuple, NF_NAT_MANIP_SRC,
+			      &range->min_proto, &range->max_proto))
+		return 1;
+
+	return 0;
+}
+
+static inline int
+same_src(const struct nf_conn *ct,
+	 const struct nf_conntrack_tuple *tuple)
+{
+	const struct nf_conntrack_tuple *t;
+
+	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	return (t->dst.protonum == tuple->dst.protonum &&
+		nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) &&
+		t->src.u.all == tuple->src.u.all);
+}
+
+/* Only called for SRC manip */
+static int
+find_appropriate_src(struct net *net, u16 zone,
+		     const struct nf_nat_l3proto *l3proto,
+		     const struct nf_nat_l4proto *l4proto,
+		     const struct nf_conntrack_tuple *tuple,
+		     struct nf_conntrack_tuple *result,
+		     const struct nf_nat_range *range)
+{
+	unsigned int h = hash_by_src(net, zone, tuple);
+	const struct nf_conn_nat *nat;
+	const struct nf_conn *ct;
+	const struct hlist_node *n;
+
+	hlist_for_each_entry_rcu(nat, n, &net->ct.nat_bysource[h], bysource) {
+		ct = nat->ct;
+		if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
+			/* Copy source part from reply tuple. */
+			nf_ct_invert_tuplepr(result,
+				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+			result->dst = tuple->dst;
+
+			if (in_range(l3proto, l4proto, result, range)) {
+				rcu_read_unlock();
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+/* For [FUTURE] fragmentation handling, we want the least-used
+ * src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
+ * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
+ * 1-65535, we don't do pro-rata allocation based on ports; we choose
+ * the ip with the lowest src-ip/dst-ip/proto usage.
+ */
+static void
+find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
+		    const struct nf_nat_range *range,
+		    const struct nf_conn *ct,
+		    enum nf_nat_manip_type maniptype)
+{
+	union nf_inet_addr *var_ipp;
+	unsigned int i, max;
+	/* Host order */
+	u32 minip, maxip, j, dist;
+	bool full_range;
+
+	/* No IP mapping?  Do nothing. */
+	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
+		return;
+
+	if (maniptype == NF_NAT_MANIP_SRC)
+		var_ipp = &tuple->src.u3;
+	else
+		var_ipp = &tuple->dst.u3;
+
+	/* Fast path: only one choice. */
+	if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) {
+		*var_ipp = range->min_addr;
+		return;
+	}
+
+	if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+		max = sizeof(var_ipp->ip) / sizeof(u32) - 1;
+	else
+		max = sizeof(var_ipp->ip6) / sizeof(u32) - 1;
+
+	/* Hashing source and destination IPs gives a fairly even
+	 * spread in practice (if there are a small number of IPs
+	 * involved, there usually aren't that many connections
+	 * anyway).  The consistency means that servers see the same
+	 * client coming from the same IP (some Internet Banking sites
+	 * like this), even across reboots.
+	 */
+	j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3),
+		   range->flags & NF_NAT_RANGE_PERSISTENT ?
+			0 : (__force u32)tuple->dst.u3.all[max] ^ zone);
+
+	full_range = false;
+	for (i = 0; i <= max; i++) {
+		/* If first bytes of the address are at the maximum, use the
+		 * distance. Otherwise use the full range.
+		 */
+		if (!full_range) {
+			minip = ntohl((__force __be32)range->min_addr.all[i]);
+			maxip = ntohl((__force __be32)range->max_addr.all[i]);
+			dist  = maxip - minip + 1;
+		} else {
+			minip = 0;
+			dist  = ~0;
+		}
+
+		var_ipp->all[i] = (__force __u32)
+			htonl(minip + (((u64)j * dist) >> 32));
+		if (var_ipp->all[i] != range->max_addr.all[i])
+			full_range = true;
+
+		if (!(range->flags & NF_NAT_RANGE_PERSISTENT))
+			j ^= (__force u32)tuple->dst.u3.all[i];
+	}
+}
+
+/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
+ * we change the source to map into the range. For NF_INET_PRE_ROUTING
+ * and NF_INET_LOCAL_OUT, we change the destination to map into the
+ * range. It might not be possible to get a unique tuple, but we try.
+ * At worst (or if we race), we will end up with a final duplicate in
+ * __ip_conntrack_confirm and drop the packet. */
+static void
+get_unique_tuple(struct nf_conntrack_tuple *tuple,
+		 const struct nf_conntrack_tuple *orig_tuple,
+		 const struct nf_nat_range *range,
+		 struct nf_conn *ct,
+		 enum nf_nat_manip_type maniptype)
+{
+	const struct nf_nat_l3proto *l3proto;
+	const struct nf_nat_l4proto *l4proto;
+	struct net *net = nf_ct_net(ct);
+	u16 zone = nf_ct_zone(ct);
+
+	rcu_read_lock();
+	l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
+	l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
+					orig_tuple->dst.protonum);
+
+	/* 1) If this srcip/proto/src-proto-part is currently mapped,
+	 * and that same mapping gives a unique tuple within the given
+	 * range, use that.
+	 *
+	 * This is only required for source (ie. NAT/masq) mappings.
+	 * So far, we don't do local source mappings, so multiple
+	 * manips not an issue.
+	 */
+	if (maniptype == NF_NAT_MANIP_SRC &&
+	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+		/* try the original tuple first */
+		if (in_range(l3proto, l4proto, orig_tuple, range)) {
+			if (!nf_nat_used_tuple(orig_tuple, ct)) {
+				*tuple = *orig_tuple;
+				goto out;
+			}
+		} else if (find_appropriate_src(net, zone, l3proto, l4proto,
+						orig_tuple, tuple, range)) {
+			pr_debug("get_unique_tuple: Found current src map\n");
+			if (!nf_nat_used_tuple(tuple, ct))
+				goto out;
+		}
+	}
+
+	/* 2) Select the least-used IP/proto combination in the given range */
+	*tuple = *orig_tuple;
+	find_best_ips_proto(zone, tuple, range, ct, maniptype);
+
+	/* 3) The per-protocol part of the manip is made to map into
+	 * the range to make a unique tuple.
+	 */
+
+	/* Only bother mapping if it's not already in range and unique */
+	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
+			if (l4proto->in_range(tuple, maniptype,
+					      &range->min_proto,
+					      &range->max_proto) &&
+			    (range->min_proto.all == range->max_proto.all ||
+			     !nf_nat_used_tuple(tuple, ct)))
+				goto out;
+		} else if (!nf_nat_used_tuple(tuple, ct)) {
+			goto out;
+		}
+	}
+
+	/* Last change: get protocol to try to obtain unique tuple. */
+	l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
+out:
+	rcu_read_unlock();
+}
+
+unsigned int
+nf_nat_setup_info(struct nf_conn *ct,
+		  const struct nf_nat_range *range,
+		  enum nf_nat_manip_type maniptype)
+{
+	struct net *net = nf_ct_net(ct);
+	struct nf_conntrack_tuple curr_tuple, new_tuple;
+	struct nf_conn_nat *nat;
+
+	/* nat helper or nfctnetlink also setup binding */
+	nat = nfct_nat(ct);
+	if (!nat) {
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+		if (nat == NULL) {
+			pr_debug("failed to add NAT extension\n");
+			return NF_ACCEPT;
+		}
+	}
+
+	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
+		     maniptype == NF_NAT_MANIP_DST);
+	BUG_ON(nf_nat_initialized(ct, maniptype));
+
+	/* What we've got will look like inverse of reply. Normally
+	 * this is what is in the conntrack, except for prior
+	 * manipulations (future optimization: if num_manips == 0,
+	 * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
+	 */
+	nf_ct_invert_tuplepr(&curr_tuple,
+			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
+
+	if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+		struct nf_conntrack_tuple reply;
+
+		/* Alter conntrack table so will recognize replies. */
+		nf_ct_invert_tuplepr(&reply, &new_tuple);
+		nf_conntrack_alter_reply(ct, &reply);
+
+		/* Non-atomic: we own this at the moment. */
+		if (maniptype == NF_NAT_MANIP_SRC)
+			ct->status |= IPS_SRC_NAT;
+		else
+			ct->status |= IPS_DST_NAT;
+	}
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		unsigned int srchash;
+
+		srchash = hash_by_src(net, nf_ct_zone(ct),
+				      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		spin_lock_bh(&nf_nat_lock);
+		/* nf_conntrack_alter_reply might re-allocate extension aera */
+		nat = nfct_nat(ct);
+		nat->ct = ct;
+		hlist_add_head_rcu(&nat->bysource,
+				   &net->ct.nat_bysource[srchash]);
+		spin_unlock_bh(&nf_nat_lock);
+	}
+
+	/* It's done. */
+	if (maniptype == NF_NAT_MANIP_DST)
+		ct->status |= IPS_DST_NAT_DONE;
+	else
+		ct->status |= IPS_SRC_NAT_DONE;
+
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL(nf_nat_setup_info);
+
+/* Do packet manipulations according to nf_nat_setup_info. */
+unsigned int nf_nat_packet(struct nf_conn *ct,
+			   enum ip_conntrack_info ctinfo,
+			   unsigned int hooknum,
+			   struct sk_buff *skb)
+{
+	const struct nf_nat_l3proto *l3proto;
+	const struct nf_nat_l4proto *l4proto;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	unsigned long statusbit;
+	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
+
+	if (mtype == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply dir. */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	/* Non-atomic: these bits don't change. */
+	if (ct->status & statusbit) {
+		struct nf_conntrack_tuple target;
+
+		/* We are aiming to look like inverse of other direction. */
+		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+
+		l3proto = __nf_nat_l3proto_find(target.src.l3num);
+		l4proto = __nf_nat_l4proto_find(target.src.l3num,
+						target.dst.protonum);
+		if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
+			return NF_DROP;
+	}
+	return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_nat_packet);
+
+struct nf_nat_proto_clean {
+	u8	l3proto;
+	u8	l4proto;
+	bool	hash;
+};
+
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int nf_nat_proto_clean(struct nf_conn *i, void *data)
+{
+	const struct nf_nat_proto_clean *clean = data;
+	struct nf_conn_nat *nat = nfct_nat(i);
+
+	if (!nat)
+		return 0;
+	if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
+	    (clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
+		return 0;
+
+	if (clean->hash) {
+		spin_lock_bh(&nf_nat_lock);
+		hlist_del_rcu(&nat->bysource);
+		spin_unlock_bh(&nf_nat_lock);
+	} else {
+		memset(nat, 0, sizeof(*nat));
+		i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK |
+			       IPS_SEQ_ADJUST);
+	}
+	return 0;
+}
+
+static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
+{
+	struct nf_nat_proto_clean clean = {
+		.l3proto = l3proto,
+		.l4proto = l4proto,
+	};
+	struct net *net;
+
+	rtnl_lock();
+	/* Step 1 - remove from bysource hash */
+	clean.hash = true;
+	for_each_net(net)
+		nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+	synchronize_rcu();
+
+	/* Step 2 - clean NAT section */
+	clean.hash = false;
+	for_each_net(net)
+		nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+	rtnl_unlock();
+}
+
+static void nf_nat_l3proto_clean(u8 l3proto)
+{
+	struct nf_nat_proto_clean clean = {
+		.l3proto = l3proto,
+	};
+	struct net *net;
+
+	rtnl_lock();
+	/* Step 1 - remove from bysource hash */
+	clean.hash = true;
+	for_each_net(net)
+		nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+	synchronize_rcu();
+
+	/* Step 2 - clean NAT section */
+	clean.hash = false;
+	for_each_net(net)
+		nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
+	rtnl_unlock();
+}
+
+/* Protocol registration. */
+int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
+{
+	const struct nf_nat_l4proto **l4protos;
+	unsigned int i;
+	int ret = 0;
+
+	mutex_lock(&nf_nat_proto_mutex);
+	if (nf_nat_l4protos[l3proto] == NULL) {
+		l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *),
+				   GFP_KERNEL);
+		if (l4protos == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		for (i = 0; i < IPPROTO_MAX; i++)
+			RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown);
+
+		/* Before making proto_array visible to lockless readers,
+		 * we must make sure its content is committed to memory.
+		 */
+		smp_wmb();
+
+		nf_nat_l4protos[l3proto] = l4protos;
+	}
+
+	if (rcu_dereference_protected(
+			nf_nat_l4protos[l3proto][l4proto->l4proto],
+			lockdep_is_held(&nf_nat_proto_mutex)
+			) != &nf_nat_l4proto_unknown) {
+		ret = -EBUSY;
+		goto out;
+	}
+	RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto);
+ out:
+	mutex_unlock(&nf_nat_proto_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_register);
+
+/* No one stores the protocol anywhere; simply delete it. */
+void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto)
+{
+	mutex_lock(&nf_nat_proto_mutex);
+	RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto],
+			 &nf_nat_l4proto_unknown);
+	mutex_unlock(&nf_nat_proto_mutex);
+	synchronize_rcu();
+
+	nf_nat_l4proto_clean(l3proto, l4proto->l4proto);
+}
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
+
+int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
+{
+	int err;
+
+	err = nf_ct_l3proto_try_module_get(l3proto->l3proto);
+	if (err < 0)
+		return err;
+
+	mutex_lock(&nf_nat_proto_mutex);
+	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
+			 &nf_nat_l4proto_tcp);
+	RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
+			 &nf_nat_l4proto_udp);
+	mutex_unlock(&nf_nat_proto_mutex);
+
+	RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_register);
+
+void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
+{
+	mutex_lock(&nf_nat_proto_mutex);
+	RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL);
+	mutex_unlock(&nf_nat_proto_mutex);
+	synchronize_rcu();
+
+	nf_nat_l3proto_clean(l3proto->l3proto);
+	nf_ct_l3proto_module_put(l3proto->l3proto);
+}
+EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
+
+/* No one using conntrack by the time this called. */
+static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
+{
+	struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+
+	if (nat == NULL || nat->ct == NULL)
+		return;
+
+	NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
+
+	spin_lock_bh(&nf_nat_lock);
+	hlist_del_rcu(&nat->bysource);
+	spin_unlock_bh(&nf_nat_lock);
+}
+
+static void nf_nat_move_storage(void *new, void *old)
+{
+	struct nf_conn_nat *new_nat = new;
+	struct nf_conn_nat *old_nat = old;
+	struct nf_conn *ct = old_nat->ct;
+
+	if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
+		return;
+
+	spin_lock_bh(&nf_nat_lock);
+	hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
+	spin_unlock_bh(&nf_nat_lock);
+}
+
+static struct nf_ct_ext_type nat_extend __read_mostly = {
+	.len		= sizeof(struct nf_conn_nat),
+	.align		= __alignof__(struct nf_conn_nat),
+	.destroy	= nf_nat_cleanup_conntrack,
+	.move		= nf_nat_move_storage,
+	.id		= NF_CT_EXT_NAT,
+	.flags		= NF_CT_EXT_F_PREALLOC,
+};
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
+	[CTA_PROTONAT_PORT_MIN]	= { .type = NLA_U16 },
+	[CTA_PROTONAT_PORT_MAX]	= { .type = NLA_U16 },
+};
+
+static int nfnetlink_parse_nat_proto(struct nlattr *attr,
+				     const struct nf_conn *ct,
+				     struct nf_nat_range *range)
+{
+	struct nlattr *tb[CTA_PROTONAT_MAX+1];
+	const struct nf_nat_l4proto *l4proto;
+	int err;
+
+	err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
+	if (err < 0)
+		return err;
+
+	l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+	if (l4proto->nlattr_to_range)
+		err = l4proto->nlattr_to_range(tb, range);
+
+	return err;
+}
+
+static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
+	[CTA_NAT_V4_MINIP]	= { .type = NLA_U32 },
+	[CTA_NAT_V4_MAXIP]	= { .type = NLA_U32 },
+	[CTA_NAT_PROTO]		= { .type = NLA_NESTED },
+};
+
+static int
+nfnetlink_parse_nat(const struct nlattr *nat,
+		    const struct nf_conn *ct, struct nf_nat_range *range)
+{
+	const struct nf_nat_l3proto *l3proto;
+	struct nlattr *tb[CTA_NAT_MAX+1];
+	int err;
+
+	memset(range, 0, sizeof(*range));
+
+	err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
+	if (err < 0)
+		return err;
+
+	rcu_read_lock();
+	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
+	if (l3proto == NULL) {
+		err = -EAGAIN;
+		goto out;
+	}
+	err = l3proto->nlattr_to_range(tb, range);
+	if (err < 0)
+		goto out;
+
+	if (!tb[CTA_NAT_PROTO])
+		goto out;
+
+	err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  const struct nlattr *attr)
+{
+	struct nf_nat_range range;
+	int err;
+
+	err = nfnetlink_parse_nat(attr, ct, &range);
+	if (err < 0)
+		return err;
+	if (nf_nat_initialized(ct, manip))
+		return -EEXIST;
+
+	return nf_nat_setup_info(ct, &range, manip);
+}
+#else
+static int
+nfnetlink_parse_nat_setup(struct nf_conn *ct,
+			  enum nf_nat_manip_type manip,
+			  const struct nlattr *attr)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+static int __net_init nf_nat_net_init(struct net *net)
+{
+	/* Leave them the same for the moment. */
+	net->ct.nat_htable_size = net->ct.htable_size;
+	net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0);
+	if (!net->ct.nat_bysource)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit nf_nat_net_exit(struct net *net)
+{
+	struct nf_nat_proto_clean clean = {};
+
+	nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean);
+	synchronize_rcu();
+	nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
+}
+
+static struct pernet_operations nf_nat_net_ops = {
+	.init = nf_nat_net_init,
+	.exit = nf_nat_net_exit,
+};
+
+static struct nf_ct_helper_expectfn follow_master_nat = {
+	.name		= "nat-follow-master",
+	.expectfn	= nf_nat_follow_master,
+};
+
+static struct nfq_ct_nat_hook nfq_ct_nat = {
+	.seq_adjust	= nf_nat_tcp_seq_adjust,
+};
+
+static int __init nf_nat_init(void)
+{
+	int ret;
+
+	ret = nf_ct_extend_register(&nat_extend);
+	if (ret < 0) {
+		printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
+		return ret;
+	}
+
+	ret = register_pernet_subsys(&nf_nat_net_ops);
+	if (ret < 0)
+		goto cleanup_extend;
+
+	nf_ct_helper_expectfn_register(&follow_master_nat);
+
+	/* Initialize fake conntrack so that NAT will skip it */
+	nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
+
+	BUG_ON(nf_nat_seq_adjust_hook != NULL);
+	RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
+	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
+	RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook,
+			   nfnetlink_parse_nat_setup);
+	BUG_ON(nf_ct_nat_offset != NULL);
+	RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset);
+	RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat);
+#ifdef CONFIG_XFRM
+	BUG_ON(nf_nat_decode_session_hook != NULL);
+	RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session);
+#endif
+	return 0;
+
+ cleanup_extend:
+	nf_ct_extend_unregister(&nat_extend);
+	return ret;
+}
+
+static void __exit nf_nat_cleanup(void)
+{
+	unsigned int i;
+
+	unregister_pernet_subsys(&nf_nat_net_ops);
+	nf_ct_extend_unregister(&nat_extend);
+	nf_ct_helper_expectfn_unregister(&follow_master_nat);
+	RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL);
+	RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL);
+	RCU_INIT_POINTER(nf_ct_nat_offset, NULL);
+	RCU_INIT_POINTER(nfq_ct_nat_hook, NULL);
+#ifdef CONFIG_XFRM
+	RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
+#endif
+	for (i = 0; i < NFPROTO_NUMPROTO; i++)
+		kfree(nf_nat_l4protos[i]);
+	synchronize_net();
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(nf_nat_init);
+module_exit(nf_nat_cleanup);
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
new file mode 100644
index 000000000000..23c2b38676a6
--- /dev/null
+++ b/net/netfilter/nf_nat_helper.c
@@ -0,0 +1,435 @@
+/* nf_nat_helper.c - generic support functions for NAT helpers
+ *
+ * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
+ * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_helper.h>
+
+#define DUMP_OFFSET(x) \
+	pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
+		 x->offset_before, x->offset_after, x->correction_pos);
+
+static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
+
+/* Setup TCP sequence correction given this change at this sequence */
+static inline void
+adjust_tcp_sequence(u32 seq,
+		    int sizediff,
+		    struct nf_conn *ct,
+		    enum ip_conntrack_info ctinfo)
+{
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	struct nf_conn_nat *nat = nfct_nat(ct);
+	struct nf_nat_seq *this_way = &nat->seq[dir];
+
+	pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n",
+		 seq, sizediff);
+
+	pr_debug("adjust_tcp_sequence: Seq_offset before: ");
+	DUMP_OFFSET(this_way);
+
+	spin_lock_bh(&nf_nat_seqofs_lock);
+
+	/* SYN adjust. If it's uninitialized, or this is after last
+	 * correction, record it: we don't handle more than one
+	 * adjustment in the window, but do deal with common case of a
+	 * retransmit */
+	if (this_way->offset_before == this_way->offset_after ||
+	    before(this_way->correction_pos, seq)) {
+		this_way->correction_pos = seq;
+		this_way->offset_before = this_way->offset_after;
+		this_way->offset_after += sizediff;
+	}
+	spin_unlock_bh(&nf_nat_seqofs_lock);
+
+	pr_debug("adjust_tcp_sequence: Seq_offset after: ");
+	DUMP_OFFSET(this_way);
+}
+
+/* Get the offset value, for conntrack */
+s16 nf_nat_get_offset(const struct nf_conn *ct,
+		      enum ip_conntrack_dir dir,
+		      u32 seq)
+{
+	struct nf_conn_nat *nat = nfct_nat(ct);
+	struct nf_nat_seq *this_way;
+	s16 offset;
+
+	if (!nat)
+		return 0;
+
+	this_way = &nat->seq[dir];
+	spin_lock_bh(&nf_nat_seqofs_lock);
+	offset = after(seq, this_way->correction_pos)
+		 ? this_way->offset_after : this_way->offset_before;
+	spin_unlock_bh(&nf_nat_seqofs_lock);
+
+	return offset;
+}
+
+/* Frobs data inside this packet, which is linear. */
+static void mangle_contents(struct sk_buff *skb,
+			    unsigned int dataoff,
+			    unsigned int match_offset,
+			    unsigned int match_len,
+			    const char *rep_buffer,
+			    unsigned int rep_len)
+{
+	unsigned char *data;
+
+	BUG_ON(skb_is_nonlinear(skb));
+	data = skb_network_header(skb) + dataoff;
+
+	/* move post-replacement */
+	memmove(data + match_offset + rep_len,
+		data + match_offset + match_len,
+		skb->tail - (skb->network_header + dataoff +
+			     match_offset + match_len));
+
+	/* insert data from buffer */
+	memcpy(data + match_offset, rep_buffer, rep_len);
+
+	/* update skb info */
+	if (rep_len > match_len) {
+		pr_debug("nf_nat_mangle_packet: Extending packet by "
+			 "%u from %u bytes\n", rep_len - match_len, skb->len);
+		skb_put(skb, rep_len - match_len);
+	} else {
+		pr_debug("nf_nat_mangle_packet: Shrinking packet from "
+			 "%u from %u bytes\n", match_len - rep_len, skb->len);
+		__skb_trim(skb, skb->len + rep_len - match_len);
+	}
+
+	if (nf_ct_l3num((struct nf_conn *)skb->nfct) == NFPROTO_IPV4) {
+		/* fix IP hdr checksum information */
+		ip_hdr(skb)->tot_len = htons(skb->len);
+		ip_send_check(ip_hdr(skb));
+	} else
+		ipv6_hdr(skb)->payload_len =
+			htons(skb->len - sizeof(struct ipv6hdr));
+}
+
+/* Unusual, but possible case. */
+static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
+{
+	if (skb->len + extra > 65535)
+		return 0;
+
+	if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
+		return 0;
+
+	return 1;
+}
+
+void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			   __be32 seq, s16 off)
+{
+	if (!off)
+		return;
+	set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+	adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo);
+	nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
+}
+EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
+
+void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
+			   u32 ctinfo, int off)
+{
+	const struct tcphdr *th;
+
+	if (nf_ct_protonum(ct) != IPPROTO_TCP)
+		return;
+
+	th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb));
+	nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
+}
+EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust);
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
+ * command in FTP).
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * */
+int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
+			       struct nf_conn *ct,
+			       enum ip_conntrack_info ctinfo,
+			       unsigned int protoff,
+			       unsigned int match_offset,
+			       unsigned int match_len,
+			       const char *rep_buffer,
+			       unsigned int rep_len, bool adjust)
+{
+	const struct nf_nat_l3proto *l3proto;
+	struct tcphdr *tcph;
+	int oldlen, datalen;
+
+	if (!skb_make_writable(skb, skb->len))
+		return 0;
+
+	if (rep_len > match_len &&
+	    rep_len - match_len > skb_tailroom(skb) &&
+	    !enlarge_skb(skb, rep_len - match_len))
+		return 0;
+
+	SKB_LINEAR_ASSERT(skb);
+
+	tcph = (void *)skb->data + protoff;
+
+	oldlen = skb->len - protoff;
+	mangle_contents(skb, protoff + tcph->doff*4,
+			match_offset, match_len, rep_buffer, rep_len);
+
+	datalen = skb->len - protoff;
+
+	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
+	l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check,
+			     datalen, oldlen);
+
+	if (adjust && rep_len != match_len)
+		nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
+				      (int)rep_len - (int)match_len);
+
+	return 1;
+}
+EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet);
+
+/* Generic function for mangling variable-length address changes inside
+ * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
+ * command in the Amanda protocol)
+ *
+ * Takes care about all the nasty sequence number changes, checksumming,
+ * skb enlargement, ...
+ *
+ * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
+ *       should be fairly easy to do.
+ */
+int
+nf_nat_mangle_udp_packet(struct sk_buff *skb,
+			 struct nf_conn *ct,
+			 enum ip_conntrack_info ctinfo,
+			 unsigned int protoff,
+			 unsigned int match_offset,
+			 unsigned int match_len,
+			 const char *rep_buffer,
+			 unsigned int rep_len)
+{
+	const struct nf_nat_l3proto *l3proto;
+	struct udphdr *udph;
+	int datalen, oldlen;
+
+	if (!skb_make_writable(skb, skb->len))
+		return 0;
+
+	if (rep_len > match_len &&
+	    rep_len - match_len > skb_tailroom(skb) &&
+	    !enlarge_skb(skb, rep_len - match_len))
+		return 0;
+
+	udph = (void *)skb->data + protoff;
+
+	oldlen = skb->len - protoff;
+	mangle_contents(skb, protoff + sizeof(*udph),
+			match_offset, match_len, rep_buffer, rep_len);
+
+	/* update the length of the UDP packet */
+	datalen = skb->len - protoff;
+	udph->len = htons(datalen);
+
+	/* fix udp checksum if udp checksum was previously calculated */
+	if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
+		return 1;
+
+	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
+	l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check,
+			     datalen, oldlen);
+
+	return 1;
+}
+EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
+
+/* Adjust one found SACK option including checksum correction */
+static void
+sack_adjust(struct sk_buff *skb,
+	    struct tcphdr *tcph,
+	    unsigned int sackoff,
+	    unsigned int sackend,
+	    struct nf_nat_seq *natseq)
+{
+	while (sackoff < sackend) {
+		struct tcp_sack_block_wire *sack;
+		__be32 new_start_seq, new_end_seq;
+
+		sack = (void *)skb->data + sackoff;
+		if (after(ntohl(sack->start_seq) - natseq->offset_before,
+			  natseq->correction_pos))
+			new_start_seq = htonl(ntohl(sack->start_seq)
+					- natseq->offset_after);
+		else
+			new_start_seq = htonl(ntohl(sack->start_seq)
+					- natseq->offset_before);
+
+		if (after(ntohl(sack->end_seq) - natseq->offset_before,
+			  natseq->correction_pos))
+			new_end_seq = htonl(ntohl(sack->end_seq)
+				      - natseq->offset_after);
+		else
+			new_end_seq = htonl(ntohl(sack->end_seq)
+				      - natseq->offset_before);
+
+		pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
+			 ntohl(sack->start_seq), new_start_seq,
+			 ntohl(sack->end_seq), new_end_seq);
+
+		inet_proto_csum_replace4(&tcph->check, skb,
+					 sack->start_seq, new_start_seq, 0);
+		inet_proto_csum_replace4(&tcph->check, skb,
+					 sack->end_seq, new_end_seq, 0);
+		sack->start_seq = new_start_seq;
+		sack->end_seq = new_end_seq;
+		sackoff += sizeof(*sack);
+	}
+}
+
+/* TCP SACK sequence number adjustment */
+static inline unsigned int
+nf_nat_sack_adjust(struct sk_buff *skb,
+		   unsigned int protoff,
+		   struct tcphdr *tcph,
+		   struct nf_conn *ct,
+		   enum ip_conntrack_info ctinfo)
+{
+	unsigned int dir, optoff, optend;
+	struct nf_conn_nat *nat = nfct_nat(ct);
+
+	optoff = protoff + sizeof(struct tcphdr);
+	optend = protoff + tcph->doff * 4;
+
+	if (!skb_make_writable(skb, optend))
+		return 0;
+
+	dir = CTINFO2DIR(ctinfo);
+
+	while (optoff < optend) {
+		/* Usually: option, length. */
+		unsigned char *op = skb->data + optoff;
+
+		switch (op[0]) {
+		case TCPOPT_EOL:
+			return 1;
+		case TCPOPT_NOP:
+			optoff++;
+			continue;
+		default:
+			/* no partial options */
+			if (optoff + 1 == optend ||
+			    optoff + op[1] > optend ||
+			    op[1] < 2)
+				return 0;
+			if (op[0] == TCPOPT_SACK &&
+			    op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
+			    ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
+				sack_adjust(skb, tcph, optoff+2,
+					    optoff+op[1], &nat->seq[!dir]);
+			optoff += op[1];
+		}
+	}
+	return 1;
+}
+
+/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
+int
+nf_nat_seq_adjust(struct sk_buff *skb,
+		  struct nf_conn *ct,
+		  enum ip_conntrack_info ctinfo,
+		  unsigned int protoff)
+{
+	struct tcphdr *tcph;
+	int dir;
+	__be32 newseq, newack;
+	s16 seqoff, ackoff;
+	struct nf_conn_nat *nat = nfct_nat(ct);
+	struct nf_nat_seq *this_way, *other_way;
+
+	dir = CTINFO2DIR(ctinfo);
+
+	this_way = &nat->seq[dir];
+	other_way = &nat->seq[!dir];
+
+	if (!skb_make_writable(skb, protoff + sizeof(*tcph)))
+		return 0;
+
+	tcph = (void *)skb->data + protoff;
+	if (after(ntohl(tcph->seq), this_way->correction_pos))
+		seqoff = this_way->offset_after;
+	else
+		seqoff = this_way->offset_before;
+
+	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+		  other_way->correction_pos))
+		ackoff = other_way->offset_after;
+	else
+		ackoff = other_way->offset_before;
+
+	newseq = htonl(ntohl(tcph->seq) + seqoff);
+	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+
+	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+		 ntohl(newack));
+
+	tcph->seq = newseq;
+	tcph->ack_seq = newack;
+
+	return nf_nat_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+}
+
+/* Setup NAT on this expected conntrack so it follows master. */
+/* If we fail to get a free NAT slot, we'll get dropped on confirm */
+void nf_nat_follow_master(struct nf_conn *ct,
+			  struct nf_conntrack_expect *exp)
+{
+	struct nf_nat_range range;
+
+	/* This must be a fresh one. */
+	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+	/* Change src to where master sends to */
+	range.flags = NF_NAT_RANGE_MAP_IPS;
+	range.min_addr = range.max_addr
+		= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+
+	/* For DST manip, map port here to where it's expected. */
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+	range.min_proto = range.max_proto = exp->saved_proto;
+	range.min_addr = range.max_addr
+		= ct->master->tuplehash[!exp->dir].tuple.src.u3;
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
new file mode 100644
index 000000000000..9baaf734c142
--- /dev/null
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -0,0 +1,112 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/netfilter.h>
+#include <linux/export.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
+			     enum nf_nat_manip_type maniptype,
+			     const union nf_conntrack_man_proto *min,
+			     const union nf_conntrack_man_proto *max)
+{
+	__be16 port;
+
+	if (maniptype == NF_NAT_MANIP_SRC)
+		port = tuple->src.u.all;
+	else
+		port = tuple->dst.u.all;
+
+	return ntohs(port) >= ntohs(min->all) &&
+	       ntohs(port) <= ntohs(max->all);
+}
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
+
+void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
+				 struct nf_conntrack_tuple *tuple,
+				 const struct nf_nat_range *range,
+				 enum nf_nat_manip_type maniptype,
+				 const struct nf_conn *ct,
+				 u16 *rover)
+{
+	unsigned int range_size, min, i;
+	__be16 *portptr;
+	u_int16_t off;
+
+	if (maniptype == NF_NAT_MANIP_SRC)
+		portptr = &tuple->src.u.all;
+	else
+		portptr = &tuple->dst.u.all;
+
+	/* If no range specified... */
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+		/* If it's dst rewrite, can't change port */
+		if (maniptype == NF_NAT_MANIP_DST)
+			return;
+
+		if (ntohs(*portptr) < 1024) {
+			/* Loose convention: >> 512 is credential passing */
+			if (ntohs(*portptr) < 512) {
+				min = 1;
+				range_size = 511 - min + 1;
+			} else {
+				min = 600;
+				range_size = 1023 - min + 1;
+			}
+		} else {
+			min = 1024;
+			range_size = 65535 - 1024 + 1;
+		}
+	} else {
+		min = ntohs(range->min_proto.all);
+		range_size = ntohs(range->max_proto.all) - min + 1;
+	}
+
+	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
+		off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC
+						  ? tuple->dst.u.all
+						  : tuple->src.u.all);
+	else
+		off = *rover;
+
+	for (i = 0; ; ++off) {
+		*portptr = htons(min + off % range_size);
+		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
+			continue;
+		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
+			*rover = off;
+		return;
+	}
+	return;
+}
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
+
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
+				   struct nf_nat_range *range)
+{
+	if (tb[CTA_PROTONAT_PORT_MIN]) {
+		range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
+		range->max_proto.all = range->min_proto.all;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+	if (tb[CTA_PROTONAT_PORT_MAX]) {
+		range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range);
+#endif
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
new file mode 100644
index 000000000000..c8be2cdac0bf
--- /dev/null
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -0,0 +1,116 @@
+/*
+ * DCCP NAT protocol helper
+ *
+ * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/dccp.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static u_int16_t dccp_port_rover;
+
+static void
+dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		  struct nf_conntrack_tuple *tuple,
+		  const struct nf_nat_range *range,
+		  enum nf_nat_manip_type maniptype,
+		  const struct nf_conn *ct)
+{
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+				    &dccp_port_rover);
+}
+
+static bool
+dccp_manip_pkt(struct sk_buff *skb,
+	       const struct nf_nat_l3proto *l3proto,
+	       unsigned int iphdroff, unsigned int hdroff,
+	       const struct nf_conntrack_tuple *tuple,
+	       enum nf_nat_manip_type maniptype)
+{
+	struct dccp_hdr *hdr;
+	__be16 *portptr, oldport, newport;
+	int hdrsize = 8; /* DCCP connection tracking guarantees this much */
+
+	if (skb->len >= hdroff + sizeof(struct dccp_hdr))
+		hdrsize = sizeof(struct dccp_hdr);
+
+	if (!skb_make_writable(skb, hdroff + hdrsize))
+		return false;
+
+	hdr = (struct dccp_hdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		newport = tuple->src.u.dccp.port;
+		portptr = &hdr->dccph_sport;
+	} else {
+		newport = tuple->dst.u.dccp.port;
+		portptr = &hdr->dccph_dport;
+	}
+
+	oldport = *portptr;
+	*portptr = newport;
+
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
+	l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
+			     tuple, maniptype);
+	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
+				 0);
+	return true;
+}
+
+static const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
+	.l4proto		= IPPROTO_DCCP,
+	.manip_pkt		= dccp_manip_pkt,
+	.in_range		= nf_nat_l4proto_in_range,
+	.unique_tuple		= dccp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_dccp_init(void)
+{
+	int err;
+
+	err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
+	if (err < 0)
+		goto err1;
+	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
+	if (err < 0)
+		goto err2;
+	return 0;
+
+err2:
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
+err1:
+	return err;
+}
+
+static void __exit nf_nat_proto_dccp_fini(void)
+{
+	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_dccp);
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_dccp);
+
+}
+
+module_init(nf_nat_proto_dccp_init);
+module_exit(nf_nat_proto_dccp_fini);
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("DCCP NAT protocol helper");
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
new file mode 100644
index 000000000000..e64faa5ca893
--- /dev/null
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/sctp.h>
+#include <linux/module.h>
+#include <net/sctp/checksum.h>
+
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static u_int16_t nf_sctp_port_rover;
+
+static void
+sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		  struct nf_conntrack_tuple *tuple,
+		  const struct nf_nat_range *range,
+		  enum nf_nat_manip_type maniptype,
+		  const struct nf_conn *ct)
+{
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+				    &nf_sctp_port_rover);
+}
+
+static bool
+sctp_manip_pkt(struct sk_buff *skb,
+	       const struct nf_nat_l3proto *l3proto,
+	       unsigned int iphdroff, unsigned int hdroff,
+	       const struct nf_conntrack_tuple *tuple,
+	       enum nf_nat_manip_type maniptype)
+{
+	struct sk_buff *frag;
+	sctp_sctphdr_t *hdr;
+	__be32 crc32;
+
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct sctphdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		hdr->source = tuple->src.u.sctp.port;
+	} else {
+		/* Get rid of dst port */
+		hdr->dest = tuple->dst.u.sctp.port;
+	}
+
+	crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
+	skb_walk_frags(skb, frag)
+		crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
+					  crc32);
+	crc32 = sctp_end_cksum(crc32);
+	hdr->checksum = crc32;
+
+	return true;
+}
+
+static const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
+	.l4proto		= IPPROTO_SCTP,
+	.manip_pkt		= sctp_manip_pkt,
+	.in_range		= nf_nat_l4proto_in_range,
+	.unique_tuple		= sctp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_sctp_init(void)
+{
+	int err;
+
+	err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
+	if (err < 0)
+		goto err1;
+	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
+	if (err < 0)
+		goto err2;
+	return 0;
+
+err2:
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
+err1:
+	return err;
+}
+
+static void __exit nf_nat_proto_sctp_exit(void)
+{
+	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_sctp);
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_sctp);
+}
+
+module_init(nf_nat_proto_sctp_init);
+module_exit(nf_nat_proto_sctp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SCTP NAT protocol helper");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
new file mode 100644
index 000000000000..83ec8a6e4c36
--- /dev/null
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -0,0 +1,85 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+#include <net/netfilter/nf_nat_core.h>
+
+static u16 tcp_port_rover;
+
+static void
+tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		 struct nf_conntrack_tuple *tuple,
+		 const struct nf_nat_range *range,
+		 enum nf_nat_manip_type maniptype,
+		 const struct nf_conn *ct)
+{
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+				    &tcp_port_rover);
+}
+
+static bool
+tcp_manip_pkt(struct sk_buff *skb,
+	      const struct nf_nat_l3proto *l3proto,
+	      unsigned int iphdroff, unsigned int hdroff,
+	      const struct nf_conntrack_tuple *tuple,
+	      enum nf_nat_manip_type maniptype)
+{
+	struct tcphdr *hdr;
+	__be16 *portptr, newport, oldport;
+	int hdrsize = 8; /* TCP connection tracking guarantees this much */
+
+	/* this could be a inner header returned in icmp packet; in such
+	   cases we cannot update the checksum field since it is outside of
+	   the 8 bytes of transport layer headers we are guaranteed */
+	if (skb->len >= hdroff + sizeof(struct tcphdr))
+		hdrsize = sizeof(struct tcphdr);
+
+	if (!skb_make_writable(skb, hdroff + hdrsize))
+		return false;
+
+	hdr = (struct tcphdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		newport = tuple->src.u.tcp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst port */
+		newport = tuple->dst.u.tcp.port;
+		portptr = &hdr->dest;
+	}
+
+	oldport = *portptr;
+	*portptr = newport;
+
+	if (hdrsize < sizeof(*hdr))
+		return true;
+
+	l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
+	return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_tcp = {
+	.l4proto		= IPPROTO_TCP,
+	.manip_pkt		= tcp_manip_pkt,
+	.in_range		= nf_nat_l4proto_in_range,
+	.unique_tuple		= tcp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
+#endif
+};
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
new file mode 100644
index 000000000000..7df613fb34a2
--- /dev/null
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -0,0 +1,76 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static u16 udp_port_rover;
+
+static void
+udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		 struct nf_conntrack_tuple *tuple,
+		 const struct nf_nat_range *range,
+		 enum nf_nat_manip_type maniptype,
+		 const struct nf_conn *ct)
+{
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+				    &udp_port_rover);
+}
+
+static bool
+udp_manip_pkt(struct sk_buff *skb,
+	      const struct nf_nat_l3proto *l3proto,
+	      unsigned int iphdroff, unsigned int hdroff,
+	      const struct nf_conntrack_tuple *tuple,
+	      enum nf_nat_manip_type maniptype)
+{
+	struct udphdr *hdr;
+	__be16 *portptr, newport;
+
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+	hdr = (struct udphdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of src port */
+		newport = tuple->src.u.udp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst port */
+		newport = tuple->dst.u.udp.port;
+		portptr = &hdr->dest;
+	}
+	if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		l3proto->csum_update(skb, iphdroff, &hdr->check,
+				     tuple, maniptype);
+		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
+					 0);
+		if (!hdr->check)
+			hdr->check = CSUM_MANGLED_0;
+	}
+	*portptr = newport;
+	return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_udp = {
+	.l4proto		= IPPROTO_UDP,
+	.manip_pkt		= udp_manip_pkt,
+	.in_range		= nf_nat_l4proto_in_range,
+	.unique_tuple		= udp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
+#endif
+};
diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
new file mode 100644
index 000000000000..776a0d1317b1
--- /dev/null
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -0,0 +1,106 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/udp.h>
+
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static u16 udplite_port_rover;
+
+static void
+udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		     struct nf_conntrack_tuple *tuple,
+		     const struct nf_nat_range *range,
+		     enum nf_nat_manip_type maniptype,
+		     const struct nf_conn *ct)
+{
+	nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
+				    &udplite_port_rover);
+}
+
+static bool
+udplite_manip_pkt(struct sk_buff *skb,
+		  const struct nf_nat_l3proto *l3proto,
+		  unsigned int iphdroff, unsigned int hdroff,
+		  const struct nf_conntrack_tuple *tuple,
+		  enum nf_nat_manip_type maniptype)
+{
+	struct udphdr *hdr;
+	__be16 *portptr, newport;
+
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct udphdr *)(skb->data + hdroff);
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		/* Get rid of source port */
+		newport = tuple->src.u.udp.port;
+		portptr = &hdr->source;
+	} else {
+		/* Get rid of dst port */
+		newport = tuple->dst.u.udp.port;
+		portptr = &hdr->dest;
+	}
+
+	l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+	inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
+	if (!hdr->check)
+		hdr->check = CSUM_MANGLED_0;
+
+	*portptr = newport;
+	return true;
+}
+
+static const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
+	.l4proto		= IPPROTO_UDPLITE,
+	.manip_pkt		= udplite_manip_pkt,
+	.in_range		= nf_nat_l4proto_in_range,
+	.unique_tuple		= udplite_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_udplite_init(void)
+{
+	int err;
+
+	err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
+	if (err < 0)
+		goto err1;
+	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
+	if (err < 0)
+		goto err2;
+	return 0;
+
+err2:
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
+err1:
+	return err;
+}
+
+static void __exit nf_nat_proto_udplite_fini(void)
+{
+	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_udplite);
+	nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_udplite);
+}
+
+module_init(nf_nat_proto_udplite_init);
+module_exit(nf_nat_proto_udplite_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c
new file mode 100644
index 000000000000..6e494d584412
--- /dev/null
+++ b/net/netfilter/nf_nat_proto_unknown.c
@@ -0,0 +1,54 @@
+/* The "unknown" protocol.  This is what is used for protocols we
+ * don't understand.  It's returned by ip_ct_find_proto().
+ */
+
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
+			     enum nf_nat_manip_type manip_type,
+			     const union nf_conntrack_man_proto *min,
+			     const union nf_conntrack_man_proto *max)
+{
+	return true;
+}
+
+static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
+				 struct nf_conntrack_tuple *tuple,
+				 const struct nf_nat_range *range,
+				 enum nf_nat_manip_type maniptype,
+				 const struct nf_conn *ct)
+{
+	/* Sorry: we can't help you; if it's not unique, we can't frob
+	 * anything.
+	 */
+	return;
+}
+
+static bool
+unknown_manip_pkt(struct sk_buff *skb,
+		  const struct nf_nat_l3proto *l3proto,
+		  unsigned int iphdroff, unsigned int hdroff,
+		  const struct nf_conntrack_tuple *tuple,
+		  enum nf_nat_manip_type maniptype)
+{
+	return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_unknown = {
+	.manip_pkt		= unknown_manip_pkt,
+	.in_range		= unknown_in_range,
+	.unique_tuple		= unknown_unique_tuple,
+};
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
new file mode 100644
index 000000000000..7521368a6034
--- /dev/null
+++ b/net/netfilter/xt_nat.c
@@ -0,0 +1,167 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat_core.h>
+
+static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+
+	if (mr->rangesize != 1) {
+		pr_info("%s: multiple ranges no longer supported\n",
+			par->target->name);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void xt_nat_convert_range(struct nf_nat_range *dst,
+				 const struct nf_nat_ipv4_range *src)
+{
+	memset(&dst->min_addr, 0, sizeof(dst->min_addr));
+	memset(&dst->max_addr, 0, sizeof(dst->max_addr));
+
+	dst->flags	 = src->flags;
+	dst->min_addr.ip = src->min_ip;
+	dst->max_addr.ip = src->max_ip;
+	dst->min_proto	 = src->min;
+	dst->max_proto	 = src->max;
+}
+
+static unsigned int
+xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_range range;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	NF_CT_ASSERT(ct != NULL &&
+		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		      ctinfo == IP_CT_RELATED_REPLY));
+
+	xt_nat_convert_range(&range, &mr->range[0]);
+	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_range range;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	NF_CT_ASSERT(ct != NULL &&
+		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+	xt_nat_convert_range(&range, &mr->range[0]);
+	return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+
+static unsigned int
+xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range *range = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	NF_CT_ASSERT(ct != NULL &&
+		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+		      ctinfo == IP_CT_RELATED_REPLY));
+
+	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC);
+}
+
+static unsigned int
+xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range *range = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	NF_CT_ASSERT(ct != NULL &&
+		     (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+
+	return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST);
+}
+
+static struct xt_target xt_nat_target_reg[] __read_mostly = {
+	{
+		.name		= "SNAT",
+		.revision	= 0,
+		.checkentry	= xt_nat_checkentry_v0,
+		.target		= xt_snat_target_v0,
+		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
+		.family		= NFPROTO_IPV4,
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_OUT),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DNAT",
+		.revision	= 0,
+		.checkentry	= xt_nat_checkentry_v0,
+		.target		= xt_dnat_target_v0,
+		.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
+		.family		= NFPROTO_IPV4,
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "SNAT",
+		.revision	= 1,
+		.target		= xt_snat_target_v1,
+		.targetsize	= sizeof(struct nf_nat_range),
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_POST_ROUTING) |
+				  (1 << NF_INET_LOCAL_OUT),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DNAT",
+		.revision	= 1,
+		.target		= xt_dnat_target_v1,
+		.targetsize	= sizeof(struct nf_nat_range),
+		.table		= "nat",
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init xt_nat_init(void)
+{
+	return xt_register_targets(xt_nat_target_reg,
+				   ARRAY_SIZE(xt_nat_target_reg));
+}
+
+static void __exit xt_nat_exit(void)
+{
+	xt_unregister_targets(xt_nat_target_reg, ARRAY_SIZE(xt_nat_target_reg));
+}
+
+module_init(xt_nat_init);
+module_exit(xt_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_SNAT");
+MODULE_ALIAS("ipt_DNAT");
-- 
cgit v1.2.3


From 2cf545e835aae92173ef0b1f4af385e9c40f21e8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:14:10 +0200
Subject: net: core: add function for incremental IPv6 pseudo header checksum
 updates

Add inet_proto_csum_replace16 for incrementally updating IPv6 pseudo header
checksums for IPv6 NAT.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/net/checksum.h |  3 +++
 net/core/utils.c       | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/net/checksum.h b/include/net/checksum.h
index ba55d8b8c87c..600d1d705bb8 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -109,6 +109,9 @@ static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to)
 struct sk_buff;
 extern void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 				     __be32 from, __be32 to, int pseudohdr);
+extern void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
+				      const __be32 *from, const __be32 *to,
+				      int pseudohdr);
 
 static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
 					    __be16 from, __be16 to,
diff --git a/net/core/utils.c b/net/core/utils.c
index 39895a65e54a..f5613d569c23 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -294,6 +294,26 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(inet_proto_csum_replace4);
 
+void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
+			       const __be32 *from, const __be32 *to,
+			       int pseudohdr)
+{
+	__be32 diff[] = {
+		~from[0], ~from[1], ~from[2], ~from[3],
+		to[0], to[1], to[2], to[3],
+	};
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		*sum = csum_fold(csum_partial(diff, sizeof(diff),
+				 ~csum_unfold(*sum)));
+		if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+			skb->csum = ~csum_partial(diff, sizeof(diff),
+						  ~skb->csum);
+	} else if (pseudohdr)
+		*sum = ~csum_fold(csum_partial(diff, sizeof(diff),
+				  csum_unfold(*sum)));
+}
+EXPORT_SYMBOL(inet_proto_csum_replace16);
+
 int mac_pton(const char *s, u8 *mac)
 {
 	int i;
-- 
cgit v1.2.3


From 58a317f1061c894d2344c0b6a18ab4a64b69b815 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:14:12 +0200
Subject: netfilter: ipv6: add IPv6 NAT support

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h  |   2 +
 include/net/netfilter/nf_nat_l3proto.h         |   5 +
 include/net/netfilter/nf_nat_l4proto.h         |   1 +
 include/net/netns/ipv6.h                       |   1 +
 net/core/secure_seq.c                          |   1 +
 net/ipv6/netfilter/Kconfig                     |  12 +
 net/ipv6/netfilter/Makefile                    |   4 +
 net/ipv6/netfilter/ip6table_nat.c              | 321 +++++++++++++++++++++++++
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |  37 ++-
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c       | 287 ++++++++++++++++++++++
 net/ipv6/netfilter/nf_nat_proto_icmpv6.c       |  90 +++++++
 net/netfilter/nf_nat_core.c                    |   2 +
 net/netfilter/xt_nat.c                         |   3 +
 13 files changed, 764 insertions(+), 2 deletions(-)
 create mode 100644 net/ipv6/netfilter/ip6table_nat.c
 create mode 100644 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
 create mode 100644 net/ipv6/netfilter/nf_nat_proto_icmpv6.c

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 68920eab287c..43bfe3e1685b 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -147,6 +147,8 @@ enum ctattr_nat {
 	CTA_NAT_V4_MAXIP,
 #define CTA_NAT_MAXIP CTA_NAT_V4_MAXIP
 	CTA_NAT_PROTO,
+	CTA_NAT_V6_MINIP,
+	CTA_NAT_V6_MAXIP,
 	__CTA_NAT_MAX
 };
 #define CTA_NAT_MAX (__CTA_NAT_MAX - 1)
diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index beed96961fa7..bd3b97e02c82 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -43,5 +43,10 @@ extern int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 					 struct nf_conn *ct,
 					 enum ip_conntrack_info ctinfo,
 					 unsigned int hooknum);
+extern int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+					   struct nf_conn *ct,
+					   enum ip_conntrack_info ctinfo,
+					   unsigned int hooknum,
+					   unsigned int hdrlen);
 
 #endif /* _NF_NAT_L3PROTO_H */
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index 1f0a4f018fcf..24feb68d1bcc 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -51,6 +51,7 @@ extern const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto
 extern const struct nf_nat_l4proto nf_nat_l4proto_tcp;
 extern const struct nf_nat_l4proto nf_nat_l4proto_udp;
 extern const struct nf_nat_l4proto nf_nat_l4proto_icmp;
+extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6;
 extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
 
 extern bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index df0a5456a3fd..0318104a9458 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -42,6 +42,7 @@ struct netns_ipv6 {
 #ifdef CONFIG_SECURITY
 	struct xt_table		*ip6table_security;
 #endif
+	struct xt_table		*ip6table_nat;
 #endif
 	struct rt6_info         *ip6_null_entry;
 	struct rt6_statistics   *rt6_stats;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 99b2596531bb..e61a8bb7fce7 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -76,6 +76,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 
 	return hash[0];
 }
+EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #ifdef CONFIG_INET
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 10135342799e..b27e0ad4b738 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,18 @@ config NF_CONNTRACK_IPV6
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_NAT_IPV6
+	tristate "IPv6 NAT"
+	depends on NF_CONNTRACK_IPV6
+	depends on NETFILTER_ADVANCED
+	select NF_NAT
+	help
+	  The IPv6 NAT option allows masquerading, port forwarding and other
+	  forms of full Network Address Port Translation. It is controlled by
+	  the `nat' table in ip6tables, see the man page for ip6tables(8).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 534d3f216f7b..76779376da4c 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
 obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
 obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
+obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o
 
 # objects for l3 independent conntrack
 nf_conntrack_ipv6-y  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
@@ -15,6 +16,9 @@ nf_conntrack_ipv6-y  :=  nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
 # l3 independent conntrack
 obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
 
+nf_nat_ipv6-y		:= nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
+obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
+
 # defrag
 nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
new file mode 100644
index 000000000000..e418bd6350a4
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT
+ * funded by Astaro.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv6_table = {
+	.name		= "nat",
+	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
+			  (1 << NF_INET_POST_ROUTING) |
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
+	.me		= THIS_MODULE,
+	.af		= NFPROTO_IPV6,
+};
+
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+	/* Force range to this IP; let proto decide mapping for
+	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	 */
+	struct nf_nat_range range;
+
+	range.flags = 0;
+	pr_debug("Allocating NULL binding for %p (%pI6)\n", ct,
+		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 :
+		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6);
+
+	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+				     const struct net_device *in,
+				     const struct net_device *out,
+				     struct nf_conn *ct)
+{
+	struct net *net = nf_ct_net(ct);
+	unsigned int ret;
+
+	ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat);
+	if (ret == NF_ACCEPT) {
+		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+			ret = alloc_null_binding(ct, hooknum);
+	}
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_fn(unsigned int hooknum,
+	       struct sk_buff *skb,
+	       const struct net_device *in,
+	       const struct net_device *out,
+	       int (*okfn)(struct sk_buff *))
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_nat *nat;
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+	__be16 frag_off;
+	int hdrlen;
+	u8 nexthdr;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	/* Can't track?  It's not due to stress, or conntrack would
+	 * have dropped it.  Hence it's the user's responsibilty to
+	 * packet filter it out, or implement conntrack/NAT for that
+	 * protocol. 8) --RR
+	 */
+	if (!ct)
+		return NF_ACCEPT;
+
+	/* Don't try to NAT if this packet is not conntracked */
+	if (nf_ct_is_untracked(ct))
+		return NF_ACCEPT;
+
+	nat = nfct_nat(ct);
+	if (!nat) {
+		/* NAT module was loaded late. */
+		if (nf_ct_is_confirmed(ct))
+			return NF_ACCEPT;
+		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
+		if (nat == NULL) {
+			pr_debug("failed to add NAT extension\n");
+			return NF_ACCEPT;
+		}
+	}
+
+	switch (ctinfo) {
+	case IP_CT_RELATED:
+	case IP_CT_RELATED_REPLY:
+		nexthdr = ipv6_hdr(skb)->nexthdr;
+		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+					  &nexthdr, &frag_off);
+
+		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+							     hooknum, hdrlen))
+				return NF_DROP;
+			else
+				return NF_ACCEPT;
+		}
+		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+	case IP_CT_NEW:
+		/* Seen it before?  This can happen for loopback, retrans,
+		 * or local packets.
+		 */
+		if (!nf_nat_initialized(ct, maniptype)) {
+			unsigned int ret;
+
+			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+			if (ret != NF_ACCEPT)
+				return ret;
+		} else
+			pr_debug("Already setup manip %s for ct %p\n",
+				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+				 ct);
+		break;
+
+	default:
+		/* ESTABLISHED */
+		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+	}
+
+	return nf_nat_packet(ct, ctinfo, hooknum, skb);
+}
+
+static unsigned int
+nf_nat_ipv6_in(unsigned int hooknum,
+	       struct sk_buff *skb,
+	       const struct net_device *in,
+	       const struct net_device *out,
+	       int (*okfn)(struct sk_buff *))
+{
+	unsigned int ret;
+	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+		skb_dst_drop(skb);
+
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(unsigned int hooknum,
+		struct sk_buff *skb,
+		const struct net_device *in,
+		const struct net_device *out,
+		int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+#endif
+	unsigned int ret;
+
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct ipv6hdr))
+		return NF_ACCEPT;
+
+	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+				      &ct->tuplehash[!dir].tuple.dst.u3) ||
+		    (ct->tuplehash[dir].tuple.src.u.all !=
+		     ct->tuplehash[!dir].tuple.dst.u.all))
+			if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
+				ret = NF_DROP;
+	}
+#endif
+	return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(unsigned int hooknum,
+		     struct sk_buff *skb,
+		     const struct net_device *in,
+		     const struct net_device *out,
+		     int (*okfn)(struct sk_buff *))
+{
+	const struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned int ret;
+
+	/* root is playing with raw sockets. */
+	if (skb->len < sizeof(struct ipv6hdr))
+		return NF_ACCEPT;
+
+	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn);
+	if (ret != NF_DROP && ret != NF_STOLEN &&
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+				      &ct->tuplehash[!dir].tuple.src.u3)) {
+			if (ip6_route_me_harder(skb))
+				ret = NF_DROP;
+		}
+#ifdef CONFIG_XFRM
+		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+			 ct->tuplehash[dir].tuple.dst.u.all !=
+			 ct->tuplehash[!dir].tuple.src.u.all)
+			if (nf_xfrm_me_harder(skb, AF_INET6))
+				ret = NF_DROP;
+#endif
+	}
+	return ret;
+}
+
+static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv6_in,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_PRE_ROUTING,
+		.priority	= NF_IP6_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv6_out,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_POST_ROUTING,
+		.priority	= NF_IP6_PRI_NAT_SRC,
+	},
+	/* Before packet filtering, change destination */
+	{
+		.hook		= nf_nat_ipv6_local_fn,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_OUT,
+		.priority	= NF_IP6_PRI_NAT_DST,
+	},
+	/* After packet filtering, change source */
+	{
+		.hook		= nf_nat_ipv6_fn,
+		.owner		= THIS_MODULE,
+		.pf		= NFPROTO_IPV6,
+		.hooknum	= NF_INET_LOCAL_IN,
+		.priority	= NF_IP6_PRI_NAT_SRC,
+	},
+};
+
+static int __net_init ip6table_nat_net_init(struct net *net)
+{
+	struct ip6t_replace *repl;
+
+	repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
+	if (repl == NULL)
+		return -ENOMEM;
+	net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
+	kfree(repl);
+	if (IS_ERR(net->ipv6.ip6table_nat))
+		return PTR_ERR(net->ipv6.ip6table_nat);
+	return 0;
+}
+
+static void __net_exit ip6table_nat_net_exit(struct net *net)
+{
+	ip6t_unregister_table(net, net->ipv6.ip6table_nat);
+}
+
+static struct pernet_operations ip6table_nat_net_ops = {
+	.init	= ip6table_nat_net_init,
+	.exit	= ip6table_nat_net_exit,
+};
+
+static int __init ip6table_nat_init(void)
+{
+	int err;
+
+	err = register_pernet_subsys(&ip6table_nat_net_ops);
+	if (err < 0)
+		goto err1;
+
+	err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+	if (err < 0)
+		goto err2;
+	return 0;
+
+err2:
+	unregister_pernet_subsys(&ip6table_nat_net_ops);
+err1:
+	return err;
+}
+
+static void __exit ip6table_nat_exit(void)
+{
+	nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+	unregister_pernet_subsys(&ip6table_nat_net_ops);
+}
+
+module_init(ip6table_nat_init);
+module_exit(ip6table_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index dcf6010f68d9..8860d23e61cf 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -28,6 +28,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netfilter/nf_log.h>
 
@@ -142,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 				 const struct net_device *out,
 				 int (*okfn)(struct sk_buff *))
 {
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+	int protoff;
+	__be16 frag_off;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+		goto out;
+
+	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+				   &frag_off);
+	if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+		pr_debug("proto header not found\n");
+		goto out;
+	}
+
+	/* adjust seqs for loopback traffic only in outgoing direction */
+	if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+	    !nf_is_loopback_packet(skb)) {
+		typeof(nf_nat_seq_adjust_hook) seq_adjust;
+
+		seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
+		if (!seq_adjust ||
+		    !seq_adjust(skb, ct, ctinfo, protoff)) {
+			NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+			return NF_DROP;
+		}
+	}
+out:
 	/* We've seen it coming out the other side: confirm it */
 	return nf_conntrack_confirm(skb);
 }
@@ -170,12 +201,14 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
 		}
 
 		/* Conntrack helpers need the entire reassembled packet in the
-		 * POST_ROUTING hook.
+		 * POST_ROUTING hook. In case of unconfirmed connections NAT
+		 * might reassign a helper, so the entire packet is also
+		 * required.
 		 */
 		ct = nf_ct_get(reasm, &ctinfo);
 		if (ct != NULL && !nf_ct_is_untracked(ct)) {
 			help = nfct_help(ct);
-			if (help && help->helper) {
+			if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
 				nf_conntrack_get_reasm(skb);
 				NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
 					       (struct net_device *)in,
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
new file mode 100644
index 000000000000..81a2d1c3da8e
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of IPv6 NAT funded by Astaro.
+ */
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/ip6_route.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi6 *fl6 = &fl->u.ip6;
+
+	if (ct->status & statusbit) {
+		fl6->daddr = t->dst.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl6->saddr = t->src.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_sport = t->src.u.all;
+	}
+}
+#endif
+
+static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
+				 const struct nf_nat_range *range)
+{
+	return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
+	       ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
+}
+
+static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
+				   __be16 dport)
+{
+	return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+				  unsigned int iphdroff,
+				  const struct nf_nat_l4proto *l4proto,
+				  const struct nf_conntrack_tuple *target,
+				  enum nf_nat_manip_type maniptype)
+{
+	struct ipv6hdr *ipv6h;
+	__be16 frag_off;
+	int hdroff;
+	u8 nexthdr;
+
+	if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+		return false;
+
+	ipv6h = (void *)skb->data + iphdroff;
+	nexthdr = ipv6h->nexthdr;
+	hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+				  &nexthdr, &frag_off);
+	if (hdroff < 0)
+		goto manip_addr;
+
+	if ((frag_off & htons(~0x7)) == 0 &&
+	    !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+				target, maniptype))
+		return false;
+manip_addr:
+	if (maniptype == NF_NAT_MANIP_SRC)
+		ipv6h->saddr = target->src.u3.in6;
+	else
+		ipv6h->daddr = target->dst.u3.in6;
+
+	return true;
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+				    unsigned int iphdroff, __sum16 *check,
+				    const struct nf_conntrack_tuple *t,
+				    enum nf_nat_manip_type maniptype)
+{
+	const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+	const struct in6_addr *oldip, *newip;
+
+	if (maniptype == NF_NAT_MANIP_SRC) {
+		oldip = &ipv6h->saddr;
+		newip = &t->src.u3.in6;
+	} else {
+		oldip = &ipv6h->daddr;
+		newip = &t->dst.u3.in6;
+	}
+	inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+				  newip->s6_addr32, 1);
+}
+
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+				    u8 proto, void *data, __sum16 *check,
+				    int datalen, int oldlen)
+{
+	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (!(rt->rt6i_flags & RTF_LOCAL) &&
+		    (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_headroom(skb) +
+					  skb_network_offset(skb) +
+					  (data - (void *)skb->data);
+			skb->csum_offset = (void *)check - data;
+			*check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+						  datalen, proto, 0);
+		} else {
+			*check = 0;
+			*check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+						 datalen, proto,
+						 csum_partial(data, datalen,
+							      0));
+			if (proto == IPPROTO_UDP && !*check)
+				*check = CSUM_MANGLED_0;
+		}
+	} else
+		inet_proto_csum_replace2(check, skb,
+					 htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range *range)
+{
+	if (tb[CTA_NAT_V6_MINIP]) {
+		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+			   sizeof(struct in6_addr));
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V6_MAXIP])
+		nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+			   sizeof(struct in6_addr));
+	else
+		range->max_addr = range->min_addr;
+
+	return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
+	.l3proto		= NFPROTO_IPV6,
+	.secure_port		= nf_nat_ipv6_secure_port,
+	.in_range		= nf_nat_ipv6_in_range,
+	.manip_pkt		= nf_nat_ipv6_manip_pkt,
+	.csum_update		= nf_nat_ipv6_csum_update,
+	.csum_recalc		= nf_nat_ipv6_csum_recalc,
+	.nlattr_to_range	= nf_nat_ipv6_nlattr_to_range,
+#ifdef CONFIG_XFRM
+	.decode_session	= nf_nat_ipv6_decode_session,
+#endif
+};
+
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+				    struct nf_conn *ct,
+				    enum ip_conntrack_info ctinfo,
+				    unsigned int hooknum,
+				    unsigned int hdrlen)
+{
+	struct {
+		struct icmp6hdr	icmp6;
+		struct ipv6hdr	ip6;
+	} *inside;
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+	const struct nf_nat_l4proto *l4proto;
+	struct nf_conntrack_tuple target;
+	unsigned long statusbit;
+
+	NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+		return 0;
+	if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+		return 0;
+
+	inside = (void *)skb->data + hdrlen;
+	if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+			return 0;
+		if (ct->status & IPS_NAT_MASK)
+			return 0;
+	}
+
+	if (manip == NF_NAT_MANIP_SRC)
+		statusbit = IPS_SRC_NAT;
+	else
+		statusbit = IPS_DST_NAT;
+
+	/* Invert if this is reply direction */
+	if (dir == IP_CT_DIR_REPLY)
+		statusbit ^= IPS_NAT_MASK;
+
+	if (!(ct->status & statusbit))
+		return 1;
+
+	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
+	if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+				   l4proto, &ct->tuplehash[!dir].tuple, !manip))
+		return 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+		inside = (void *)skb->data + hdrlen;
+		inside->icmp6.icmp6_cksum = 0;
+		inside->icmp6.icmp6_cksum =
+			csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+					skb->len - hdrlen, IPPROTO_ICMPV6,
+					csum_partial(&inside->icmp6,
+						     skb->len - hdrlen, 0));
+	}
+
+	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+	l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
+	if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
+		return 0;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static int __init nf_nat_l3proto_ipv6_init(void)
+{
+	int err;
+
+	err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+	if (err < 0)
+		goto err1;
+	err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
+	if (err < 0)
+		goto err2;
+	return err;
+
+err2:
+	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+err1:
+	return err;
+}
+
+static void __exit nf_nat_l3proto_ipv6_exit(void)
+{
+	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
+	nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
+
+module_init(nf_nat_l3proto_ipv6_init);
+module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
new file mode 100644
index 000000000000..5d6da784305b
--- /dev/null
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/icmpv6.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static bool
+icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
+		enum nf_nat_manip_type maniptype,
+		const union nf_conntrack_man_proto *min,
+		const union nf_conntrack_man_proto *max)
+{
+	return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+	       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static void
+icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
+		    struct nf_conntrack_tuple *tuple,
+		    const struct nf_nat_range *range,
+		    enum nf_nat_manip_type maniptype,
+		    const struct nf_conn *ct)
+{
+	static u16 id;
+	unsigned int range_size;
+	unsigned int i;
+
+	range_size = ntohs(range->max_proto.icmp.id) -
+		     ntohs(range->min_proto.icmp.id) + 1;
+
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
+		range_size = 0xffff;
+
+	for (i = 0; ; ++id) {
+		tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
+					     (id % range_size));
+		if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+			return;
+	}
+}
+
+static bool
+icmpv6_manip_pkt(struct sk_buff *skb,
+		 const struct nf_nat_l3proto *l3proto,
+		 unsigned int iphdroff, unsigned int hdroff,
+		 const struct nf_conntrack_tuple *tuple,
+		 enum nf_nat_manip_type maniptype)
+{
+	struct icmp6hdr *hdr;
+
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+		return false;
+
+	hdr = (struct icmp6hdr *)(skb->data + hdroff);
+	l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
+			     tuple, maniptype);
+	if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST ||
+	    hdr->icmp6_code == ICMPV6_ECHO_REPLY) {
+		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
+					 hdr->icmp6_identifier,
+					 tuple->src.u.icmp.id, 0);
+		hdr->icmp6_identifier = tuple->src.u.icmp.id;
+	}
+	return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
+	.l4proto		= IPPROTO_ICMPV6,
+	.manip_pkt		= icmpv6_manip_pkt,
+	.in_range		= icmpv6_in_range,
+	.unique_tuple		= icmpv6_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.nlattr_to_range	= nf_nat_l4proto_nlattr_to_range,
+#endif
+};
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c577b753fb9a..29d445235199 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -696,6 +696,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
 static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 	[CTA_NAT_V4_MINIP]	= { .type = NLA_U32 },
 	[CTA_NAT_V4_MAXIP]	= { .type = NLA_U32 },
+	[CTA_NAT_V6_MINIP]	= { .len = sizeof(struct in6_addr) },
+	[CTA_NAT_V6_MAXIP]	= { .len = sizeof(struct in6_addr) },
 	[CTA_NAT_PROTO]		= { .type = NLA_NESTED },
 };
 
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index 7521368a6034..81aafa8e4fef 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -163,5 +163,8 @@ module_init(xt_nat_init);
 module_exit(xt_nat_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_ALIAS("ipt_SNAT");
 MODULE_ALIAS("ipt_DNAT");
+MODULE_ALIAS("ip6t_SNAT");
+MODULE_ALIAS("ip6t_DNAT");
-- 
cgit v1.2.3


From b3f644fc8232ca761da0b5c5ccb6f30b423c4302 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:14:14 +0200
Subject: netfilter: ip6tables: add MASQUERADE target

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/addrconf.h               |   2 +-
 include/net/netfilter/nf_nat.h       |   4 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c  |   3 +-
 net/ipv6/addrconf.c                  |   2 +-
 net/ipv6/netfilter/Kconfig           |  12 ++++
 net/ipv6/netfilter/Makefile          |   1 +
 net/ipv6/netfilter/ip6t_MASQUERADE.c | 135 +++++++++++++++++++++++++++++++++++
 7 files changed, 155 insertions(+), 4 deletions(-)
 create mode 100644 net/ipv6/netfilter/ip6t_MASQUERADE.c

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 089a09d001d1..9e63e76b20e7 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -78,7 +78,7 @@ extern struct inet6_ifaddr      *ipv6_get_ifaddr(struct net *net,
 						 int strict);
 
 extern int			ipv6_dev_get_saddr(struct net *net,
-					       struct net_device *dev,
+					       const struct net_device *dev,
 					       const struct in6_addr *daddr,
 					       unsigned int srcprefs,
 					       struct in6_addr *saddr);
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 1752f1339054..bd8eea720f2e 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -43,7 +43,9 @@ struct nf_conn_nat {
 	struct nf_conn *ct;
 	union nf_conntrack_nat_help help;
 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) || \
+    defined(CONFIG_IP6_NF_TARGET_MASQUERADE) || \
+    defined(CONFIG_IP6_NF_TARGET_MASQUERADE_MODULE)
 	int masq_index;
 #endif
 };
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 1c3aa28b51ae..5d5d4d1be9c2 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -99,7 +99,8 @@ device_cmp(struct nf_conn *i, void *ifindex)
 
 	if (!nat)
 		return 0;
-
+	if (nf_ct_l3num(i) != NFPROTO_IPV4)
+		return 0;
 	return nat->masq_index == (int)(long)ifindex;
 }
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e581009cb09e..19d4bffda9d7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1093,7 +1093,7 @@ out:
 	return ret;
 }
 
-int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
+int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 		       const struct in6_addr *daddr, unsigned int prefs,
 		       struct in6_addr *saddr)
 {
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index b27e0ad4b738..54a5032ea3df 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -144,6 +144,18 @@ config IP6_NF_TARGET_HL
 	(e.g. when running oldconfig). It selects
 	CONFIG_NETFILTER_XT_TARGET_HL.
 
+config IP6_NF_TARGET_MASQUERADE
+	tristate "MASQUERADE target support"
+	depends on NF_NAT_IPV6
+	help
+	  Masquerading is a special case of NAT: all outgoing connections are
+	  changed to seem to come from a particular interface's address, and
+	  if the interface goes down, those connections are lost.  This is
+	  only useful for dialup accounts with dynamic IP address (ie. your IP
+	  address will be different on next dialup).
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP6_NF_FILTER
 	tristate "Packet filtering"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 76779376da4c..068bad199587 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -34,4 +34,5 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
 obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
 
 # targets
+obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
 obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
new file mode 100644
index 000000000000..60e9053bab05
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
+ * NAT funded by Astaro.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/addrconf.h>
+#include <net/ipv6.h>
+
+static unsigned int
+masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct nf_nat_range *range = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct in6_addr src;
+	struct nf_conn *ct;
+	struct nf_nat_range newrange;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			    ctinfo == IP_CT_RELATED_REPLY));
+
+	if (ipv6_dev_get_saddr(dev_net(par->out), par->out,
+			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+		return NF_DROP;
+
+	nfct_nat(ct)->masq_index = par->out->ifindex;
+
+	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.in6	= src;
+	newrange.max_addr.in6	= src;
+	newrange.min_proto	= range->min_proto;
+	newrange.max_proto	= range->max_proto;
+
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+
+static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
+{
+	const struct nf_nat_range *range = par->targinfo;
+
+	if (range->flags & NF_NAT_RANGE_MAP_IPS)
+		return -EINVAL;
+	return 0;
+}
+
+static int device_cmp(struct nf_conn *ct, void *ifindex)
+{
+	const struct nf_conn_nat *nat = nfct_nat(ct);
+
+	if (!nat)
+		return 0;
+	if (nf_ct_l3num(ct) != NFPROTO_IPV6)
+		return 0;
+	return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+			     unsigned long event, void *ptr)
+{
+	const struct net_device *dev = ptr;
+	struct net *net = dev_net(dev);
+
+	if (event == NETDEV_DOWN)
+		nf_ct_iterate_cleanup(net, device_cmp,
+				      (void *)(long)dev->ifindex);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+	.notifier_call	= masq_device_event,
+};
+
+static int masq_inet_event(struct notifier_block *this,
+			   unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = ptr;
+
+	return masq_device_event(this, event, ifa->idev->dev);
+}
+
+static struct notifier_block masq_inet_notifier = {
+	.notifier_call	= masq_inet_event,
+};
+
+static struct xt_target masquerade_tg6_reg __read_mostly = {
+	.name		= "MASQUERADE",
+	.family		= NFPROTO_IPV6,
+	.checkentry	= masquerade_tg6_checkentry,
+	.target		= masquerade_tg6,
+	.targetsize	= sizeof(struct nf_nat_range),
+	.table		= "nat",
+	.hooks		= 1 << NF_INET_POST_ROUTING,
+	.me		= THIS_MODULE,
+};
+
+static int __init masquerade_tg6_init(void)
+{
+	int err;
+
+	err = xt_register_target(&masquerade_tg6_reg);
+	if (err == 0) {
+		register_netdevice_notifier(&masq_dev_notifier);
+		register_inet6addr_notifier(&masq_inet_notifier);
+	}
+
+	return err;
+}
+static void __exit masquerade_tg6_exit(void)
+{
+	unregister_inet6addr_notifier(&masq_inet_notifier);
+	unregister_netdevice_notifier(&masq_dev_notifier);
+	xt_unregister_target(&masquerade_tg6_reg);
+}
+
+module_init(masquerade_tg6_init);
+module_exit(masquerade_tg6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Xtables: automatic address SNAT");
-- 
cgit v1.2.3


From 9a664821068739dbc8eac13770e28167b46a0c0f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:14:25 +0200
Subject: netfilter: nf_nat: support IPv6 in SIP NAT helper

Add IPv6 support to the SIP NAT helper. There are no functional differences
to IPv4 NAT, just different formats for addresses.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_sip.h |   9 +-
 net/ipv4/netfilter/Kconfig                 |   5 -
 net/ipv4/netfilter/Makefile                |   1 -
 net/ipv4/netfilter/nf_nat_sip.c            | 580 ---------------------------
 net/netfilter/Kconfig                      |   5 +
 net/netfilter/Makefile                     |   1 +
 net/netfilter/nf_conntrack_sip.c           |  68 ++--
 net/netfilter/nf_nat_sip.c                 | 609 +++++++++++++++++++++++++++++
 8 files changed, 653 insertions(+), 625 deletions(-)
 delete mode 100644 net/ipv4/netfilter/nf_nat_sip.c
 create mode 100644 net/netfilter/nf_nat_sip.c

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index 1afc669a393e..387bdd02945d 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -99,10 +99,8 @@ enum sip_header_types {
 enum sdp_header_types {
 	SDP_HDR_UNSPEC,
 	SDP_HDR_VERSION,
-	SDP_HDR_OWNER_IP4,
-	SDP_HDR_CONNECTION_IP4,
-	SDP_HDR_OWNER_IP6,
-	SDP_HDR_CONNECTION_IP6,
+	SDP_HDR_OWNER,
+	SDP_HDR_CONNECTION,
 	SDP_HDR_MEDIA,
 };
 
@@ -111,7 +109,8 @@ extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
 				       unsigned int dataoff,
 				       const char **dptr,
 				       unsigned int *datalen);
-extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off);
+extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb,
+					  unsigned int protoff, s16 off);
 extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
 					      unsigned int protoff,
 					      unsigned int dataoff,
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 52c4a87007aa..30197f8003be 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -242,11 +242,6 @@ config NF_NAT_H323
 	depends on NF_CONNTRACK && NF_NAT_IPV4
 	default NF_NAT_IPV4 && NF_CONNTRACK_H323
 
-config NF_NAT_SIP
-	tristate
-	depends on NF_CONNTRACK && NF_NAT_IPV4
-	default NF_NAT_IPV4 && NF_CONNTRACK_SIP
-
 # mangle + specific targets
 config IP_NF_MANGLE
 	tristate "Packet mangling"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 8baa496f6a4e..8914abffc96d 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -23,7 +23,6 @@ obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
 obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
 obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
 obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
-obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
 obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
 obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
 
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
deleted file mode 100644
index 47a47186a791..000000000000
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ /dev/null
@@ -1,580 +0,0 @@
-/* SIP extension for NAT alteration.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_nat_ftp.c and other modules.
- * (C) 2007 United Security Providers
- * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/udp.h>
-#include <linux/tcp.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <linux/netfilter/nf_conntrack_sip.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP NAT helper");
-MODULE_ALIAS("ip_nat_sip");
-
-
-static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff,
-				  unsigned int dataoff,
-				  const char **dptr, unsigned int *datalen,
-				  unsigned int matchoff, unsigned int matchlen,
-				  const char *buffer, unsigned int buflen)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	struct tcphdr *th;
-	unsigned int baseoff;
-
-	if (nf_ct_protonum(ct) == IPPROTO_TCP) {
-		th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
-		baseoff = ip_hdrlen(skb) + th->doff * 4;
-		matchoff += dataoff - baseoff;
-
-		if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
-						protoff, matchoff, matchlen,
-						buffer, buflen, false))
-			return 0;
-	} else {
-		baseoff = ip_hdrlen(skb) + sizeof(struct udphdr);
-		matchoff += dataoff - baseoff;
-
-		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
-					      protoff, matchoff, matchlen,
-					      buffer, buflen))
-			return 0;
-	}
-
-	/* Reload data pointer and adjust datalen value */
-	*dptr = skb->data + dataoff;
-	*datalen += buflen - matchlen;
-	return 1;
-}
-
-static int map_addr(struct sk_buff *skb, unsigned int protoff,
-		    unsigned int dataoff,
-		    const char **dptr, unsigned int *datalen,
-		    unsigned int matchoff, unsigned int matchlen,
-		    union nf_inet_addr *addr, __be16 port)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-	unsigned int buflen;
-	__be32 newaddr;
-	__be16 newport;
-
-	if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
-	    ct->tuplehash[dir].tuple.src.u.udp.port == port) {
-		newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
-		newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
-	} else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
-		   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
-		newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
-		newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
-	} else
-		return 1;
-
-	if (newaddr == addr->ip && newport == port)
-		return 1;
-
-	buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
-
-	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
-			     matchoff, matchlen, buffer, buflen);
-}
-
-static int map_sip_addr(struct sk_buff *skb, unsigned int protoff,
-			unsigned int dataoff,
-			const char **dptr, unsigned int *datalen,
-			enum sip_header_types type)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	unsigned int matchlen, matchoff;
-	union nf_inet_addr addr;
-	__be16 port;
-
-	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
-				    &matchoff, &matchlen, &addr, &port) <= 0)
-		return 1;
-	return map_addr(skb, protoff, dataoff, dptr, datalen,
-			matchoff, matchlen, &addr, port);
-}
-
-static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int protoff,
-			       unsigned int dataoff,
-			       const char **dptr, unsigned int *datalen)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned int coff, matchoff, matchlen;
-	enum sip_header_types hdr;
-	union nf_inet_addr addr;
-	__be16 port;
-	int request, in_header;
-
-	/* Basic rules: requests and responses. */
-	if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
-		if (ct_sip_parse_request(ct, *dptr, *datalen,
-					 &matchoff, &matchlen,
-					 &addr, &port) > 0 &&
-		    !map_addr(skb, protoff, dataoff, dptr, datalen,
-			      matchoff, matchlen, &addr, port))
-			return NF_DROP;
-		request = 1;
-	} else
-		request = 0;
-
-	if (nf_ct_protonum(ct) == IPPROTO_TCP)
-		hdr = SIP_HDR_VIA_TCP;
-	else
-		hdr = SIP_HDR_VIA_UDP;
-
-	/* Translate topmost Via header and parameters */
-	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
-				    hdr, NULL, &matchoff, &matchlen,
-				    &addr, &port) > 0) {
-		unsigned int olen, matchend, poff, plen, buflen, n;
-		char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-
-		/* We're only interested in headers related to this
-		 * connection */
-		if (request) {
-			if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
-			    port != ct->tuplehash[dir].tuple.src.u.udp.port)
-				goto next;
-		} else {
-			if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
-			    port != ct->tuplehash[dir].tuple.dst.u.udp.port)
-				goto next;
-		}
-
-		olen = *datalen;
-		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
-			      matchoff, matchlen, &addr, port))
-			return NF_DROP;
-
-		matchend = matchoff + matchlen + *datalen - olen;
-
-		/* The maddr= parameter (RFC 2361) specifies where to send
-		 * the reply. */
-		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
-					       "maddr=", &poff, &plen,
-					       &addr, true) > 0 &&
-		    addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
-		    addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
-			buflen = sprintf(buffer, "%pI4",
-					&ct->tuplehash[!dir].tuple.dst.u3.ip);
-			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
-					   poff, plen, buffer, buflen))
-				return NF_DROP;
-		}
-
-		/* The received= parameter (RFC 2361) contains the address
-		 * from which the server received the request. */
-		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
-					       "received=", &poff, &plen,
-					       &addr, false) > 0 &&
-		    addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
-		    addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
-			buflen = sprintf(buffer, "%pI4",
-					&ct->tuplehash[!dir].tuple.src.u3.ip);
-			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
-					   poff, plen, buffer, buflen))
-				return NF_DROP;
-		}
-
-		/* The rport= parameter (RFC 3581) contains the port number
-		 * from which the server received the request. */
-		if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
-						 "rport=", &poff, &plen,
-						 &n) > 0 &&
-		    htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
-		    htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
-			__be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
-			buflen = sprintf(buffer, "%u", ntohs(p));
-			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
-					   poff, plen, buffer, buflen))
-				return NF_DROP;
-		}
-	}
-
-next:
-	/* Translate Contact headers */
-	coff = 0;
-	in_header = 0;
-	while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
-				       SIP_HDR_CONTACT, &in_header,
-				       &matchoff, &matchlen,
-				       &addr, &port) > 0) {
-		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
-			      matchoff, matchlen,
-			      &addr, port))
-			return NF_DROP;
-	}
-
-	if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) ||
-	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO))
-		return NF_DROP;
-
-	return NF_ACCEPT;
-}
-
-static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	const struct tcphdr *th;
-
-	if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
-		return;
-
-	th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
-	nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
-}
-
-/* Handles expected signalling connections and media streams */
-static void ip_nat_sip_expected(struct nf_conn *ct,
-				struct nf_conntrack_expect *exp)
-{
-	struct nf_nat_range range;
-
-	/* This must be a fresh one. */
-	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
-	range.min_proto = range.max_proto = exp->saved_proto;
-	range.min_addr = range.max_addr = exp->saved_addr;
-	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
-
-	/* Change src to where master sends to, but only if the connection
-	 * actually came from the same source. */
-	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
-	    ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
-		range.flags = NF_NAT_RANGE_MAP_IPS;
-		range.min_addr = range.max_addr
-			= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
-		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
-	}
-}
-
-static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
-				      unsigned int dataoff,
-				      const char **dptr, unsigned int *datalen,
-				      struct nf_conntrack_expect *exp,
-				      unsigned int matchoff,
-				      unsigned int matchlen)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	__be32 newip;
-	u_int16_t port;
-	char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-	unsigned int buflen;
-
-	/* Connection will come from reply */
-	if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
-		newip = exp->tuple.dst.u3.ip;
-	else
-		newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
-
-	/* If the signalling port matches the connection's source port in the
-	 * original direction, try to use the destination port in the opposite
-	 * direction. */
-	if (exp->tuple.dst.u.udp.port ==
-	    ct->tuplehash[dir].tuple.src.u.udp.port)
-		port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
-	else
-		port = ntohs(exp->tuple.dst.u.udp.port);
-
-	exp->saved_addr = exp->tuple.dst.u3;
-	exp->tuple.dst.u3.ip = newip;
-	exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
-	exp->dir = !dir;
-	exp->expectfn = ip_nat_sip_expected;
-
-	for (; port != 0; port++) {
-		int ret;
-
-		exp->tuple.dst.u.udp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
-		if (ret == 0)
-			break;
-		else if (ret != -EBUSY) {
-			port = 0;
-			break;
-		}
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	if (exp->tuple.dst.u3.ip != exp->saved_addr.ip ||
-	    exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
-		buflen = sprintf(buffer, "%pI4:%u", &newip, port);
-		if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
-				   matchoff, matchlen, buffer, buflen))
-			goto err;
-	}
-	return NF_ACCEPT;
-
-err:
-	nf_ct_unexpect_related(exp);
-	return NF_DROP;
-}
-
-static int mangle_content_len(struct sk_buff *skb, unsigned int protoff,
-			      unsigned int dataoff,
-			      const char **dptr, unsigned int *datalen)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	unsigned int matchoff, matchlen;
-	char buffer[sizeof("65536")];
-	int buflen, c_len;
-
-	/* Get actual SDP length */
-	if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
-				  SDP_HDR_VERSION, SDP_HDR_UNSPEC,
-				  &matchoff, &matchlen) <= 0)
-		return 0;
-	c_len = *datalen - matchoff + strlen("v=");
-
-	/* Now, update SDP length */
-	if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
-			      &matchoff, &matchlen) <= 0)
-		return 0;
-
-	buflen = sprintf(buffer, "%u", c_len);
-	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
-			     matchoff, matchlen, buffer, buflen);
-}
-
-static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff,
-			     unsigned int dataoff,
-			     const char **dptr, unsigned int *datalen,
-			     unsigned int sdpoff,
-			     enum sdp_header_types type,
-			     enum sdp_header_types term,
-			     char *buffer, int buflen)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	unsigned int matchlen, matchoff;
-
-	if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
-				  &matchoff, &matchlen) <= 0)
-		return -ENOENT;
-	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
-			     matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL;
-}
-
-static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff,
-				    unsigned int dataoff,
-				    const char **dptr, unsigned int *datalen,
-				    unsigned int sdpoff,
-				    enum sdp_header_types type,
-				    enum sdp_header_types term,
-				    const union nf_inet_addr *addr)
-{
-	char buffer[sizeof("nnn.nnn.nnn.nnn")];
-	unsigned int buflen;
-
-	buflen = sprintf(buffer, "%pI4", &addr->ip);
-	if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen,
-			      sdpoff, type, term, buffer, buflen))
-		return 0;
-
-	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
-}
-
-static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int protoff,
-				    unsigned int dataoff,
-				    const char **dptr, unsigned int *datalen,
-				    unsigned int matchoff,
-				    unsigned int matchlen,
-				    u_int16_t port)
-{
-	char buffer[sizeof("nnnnn")];
-	unsigned int buflen;
-
-	buflen = sprintf(buffer, "%u", port);
-	if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
-			   matchoff, matchlen, buffer, buflen))
-		return 0;
-
-	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
-}
-
-static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int protoff,
-				       unsigned int dataoff,
-				       const char **dptr, unsigned int *datalen,
-				       unsigned int sdpoff,
-				       const union nf_inet_addr *addr)
-{
-	char buffer[sizeof("nnn.nnn.nnn.nnn")];
-	unsigned int buflen;
-
-	/* Mangle session description owner and contact addresses */
-	buflen = sprintf(buffer, "%pI4", &addr->ip);
-	if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
-			       SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
-			       buffer, buflen))
-		return 0;
-
-	switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
-				  SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
-				  buffer, buflen)) {
-	case 0:
-	/*
-	 * RFC 2327:
-	 *
-	 * Session description
-	 *
-	 * c=* (connection information - not required if included in all media)
-	 */
-	case -ENOENT:
-		break;
-	default:
-		return 0;
-	}
-
-	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
-}
-
-/* So, this packet has hit the connection tracking matching code.
-   Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
-				     unsigned int dataoff,
-				     const char **dptr, unsigned int *datalen,
-				     struct nf_conntrack_expect *rtp_exp,
-				     struct nf_conntrack_expect *rtcp_exp,
-				     unsigned int mediaoff,
-				     unsigned int medialen,
-				     union nf_inet_addr *rtp_addr)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	u_int16_t port;
-
-	/* Connection will come from reply */
-	if (ct->tuplehash[dir].tuple.src.u3.ip ==
-	    ct->tuplehash[!dir].tuple.dst.u3.ip)
-		rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
-	else
-		rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
-
-	rtp_exp->saved_addr = rtp_exp->tuple.dst.u3;
-	rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
-	rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
-	rtp_exp->dir = !dir;
-	rtp_exp->expectfn = ip_nat_sip_expected;
-
-	rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3;
-	rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
-	rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
-	rtcp_exp->dir = !dir;
-	rtcp_exp->expectfn = ip_nat_sip_expected;
-
-	/* Try to get same pair of ports: if not, try to change them. */
-	for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
-	     port != 0; port += 2) {
-		int ret;
-
-		rtp_exp->tuple.dst.u.udp.port = htons(port);
-		ret = nf_ct_expect_related(rtp_exp);
-		if (ret == -EBUSY)
-			continue;
-		else if (ret < 0) {
-			port = 0;
-			break;
-		}
-		rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
-		ret = nf_ct_expect_related(rtcp_exp);
-		if (ret == 0)
-			break;
-		else if (ret != -EBUSY) {
-			nf_ct_unexpect_related(rtp_exp);
-			port = 0;
-			break;
-		}
-	}
-
-	if (port == 0)
-		goto err1;
-
-	/* Update media port. */
-	if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
-	    !ip_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
-			     mediaoff, medialen, port))
-		goto err2;
-
-	return NF_ACCEPT;
-
-err2:
-	nf_ct_unexpect_related(rtp_exp);
-	nf_ct_unexpect_related(rtcp_exp);
-err1:
-	return NF_DROP;
-}
-
-static struct nf_ct_helper_expectfn sip_nat = {
-        .name           = "sip",
-        .expectfn       = ip_nat_sip_expected,
-};
-
-static void __exit nf_nat_sip_fini(void)
-{
-	RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
-	RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
-	nf_ct_helper_expectfn_unregister(&sip_nat);
-	synchronize_rcu();
-}
-
-static int __init nf_nat_sip_init(void)
-{
-	BUG_ON(nf_nat_sip_hook != NULL);
-	BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
-	BUG_ON(nf_nat_sip_expect_hook != NULL);
-	BUG_ON(nf_nat_sdp_addr_hook != NULL);
-	BUG_ON(nf_nat_sdp_port_hook != NULL);
-	BUG_ON(nf_nat_sdp_session_hook != NULL);
-	BUG_ON(nf_nat_sdp_media_hook != NULL);
-	RCU_INIT_POINTER(nf_nat_sip_hook, ip_nat_sip);
-	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust);
-	RCU_INIT_POINTER(nf_nat_sip_expect_hook, ip_nat_sip_expect);
-	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
-	RCU_INIT_POINTER(nf_nat_sdp_port_hook, ip_nat_sdp_port);
-	RCU_INIT_POINTER(nf_nat_sdp_session_hook, ip_nat_sdp_session);
-	RCU_INIT_POINTER(nf_nat_sdp_media_hook, ip_nat_sdp_media);
-	nf_ct_helper_expectfn_register(&sip_nat);
-	return 0;
-}
-
-module_init(nf_nat_sip_init);
-module_exit(nf_nat_sip_fini);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 2eee9f1f99ef..bf3e4649efb2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -390,6 +390,11 @@ config NF_NAT_FTP
 	depends on NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_FTP
 
+config NF_NAT_SIP
+	tristate
+	depends on NF_CONNTRACK && NF_NAT
+	default NF_NAT && NF_CONNTRACK_SIP
+
 endif # NF_CONNTRACK
 
 # transparent proxy support
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 7d6e1ea14c9b..7d6d1a035f31 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
 # NAT helpers
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
 obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
+obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
 
 # transparent proxy support
 obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d5174902db37..df8f4f284481 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -57,7 +57,8 @@ unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int protoff,
 				unsigned int *datalen) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
-void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly;
+void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, unsigned int protoff,
+				   s16 off) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook);
 
 unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb,
@@ -742,13 +743,18 @@ static int sdp_addr_len(const struct nf_conn *ct, const char *dptr,
  * be tolerant and also accept records terminated with a single newline
  * character". We handle both cases.
  */
-static const struct sip_header ct_sdp_hdrs[] = {
-	[SDP_HDR_VERSION]		= SDP_HDR("v=", NULL, digits_len),
-	[SDP_HDR_OWNER_IP4]		= SDP_HDR("o=", "IN IP4 ", sdp_addr_len),
-	[SDP_HDR_CONNECTION_IP4]	= SDP_HDR("c=", "IN IP4 ", sdp_addr_len),
-	[SDP_HDR_OWNER_IP6]		= SDP_HDR("o=", "IN IP6 ", sdp_addr_len),
-	[SDP_HDR_CONNECTION_IP6]	= SDP_HDR("c=", "IN IP6 ", sdp_addr_len),
-	[SDP_HDR_MEDIA]			= SDP_HDR("m=", NULL, media_len),
+static const struct sip_header ct_sdp_hdrs_v4[] = {
+	[SDP_HDR_VERSION]	= SDP_HDR("v=", NULL, digits_len),
+	[SDP_HDR_OWNER]		= SDP_HDR("o=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION]	= SDP_HDR("c=", "IN IP4 ", sdp_addr_len),
+	[SDP_HDR_MEDIA]		= SDP_HDR("m=", NULL, media_len),
+};
+
+static const struct sip_header ct_sdp_hdrs_v6[] = {
+	[SDP_HDR_VERSION]	= SDP_HDR("v=", NULL, digits_len),
+	[SDP_HDR_OWNER]		= SDP_HDR("o=", "IN IP6 ", sdp_addr_len),
+	[SDP_HDR_CONNECTION]	= SDP_HDR("c=", "IN IP6 ", sdp_addr_len),
+	[SDP_HDR_MEDIA]		= SDP_HDR("m=", NULL, media_len),
 };
 
 /* Linear string search within SDP header values */
@@ -774,11 +780,14 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
 			  enum sdp_header_types term,
 			  unsigned int *matchoff, unsigned int *matchlen)
 {
-	const struct sip_header *hdr = &ct_sdp_hdrs[type];
-	const struct sip_header *thdr = &ct_sdp_hdrs[term];
+	const struct sip_header *hdrs, *hdr, *thdr;
 	const char *start = dptr, *limit = dptr + datalen;
 	int shift = 0;
 
+	hdrs = nf_ct_l3num(ct) == NFPROTO_IPV4 ? ct_sdp_hdrs_v4 : ct_sdp_hdrs_v6;
+	hdr = &hdrs[type];
+	thdr = &hdrs[term];
+
 	for (dptr += dataoff; dptr < limit; dptr++) {
 		/* Find beginning of line */
 		if (*dptr != '\r' && *dptr != '\n')
@@ -945,12 +954,12 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 		    exp->class != class)
 			break;
 #ifdef CONFIG_NF_NAT_NEEDED
-		if (exp->tuple.src.l3num == AF_INET && !direct_rtp &&
-		    (exp->saved_addr.ip != exp->tuple.dst.u3.ip ||
+		if (!direct_rtp &&
+		    (!nf_inet_addr_cmp(&exp->saved_addr, &exp->tuple.dst.u3) ||
 		     exp->saved_proto.udp.port != exp->tuple.dst.u.udp.port) &&
 		    ct->status & IPS_NAT_MASK) {
-			daddr->ip		= exp->saved_addr.ip;
-			tuple.dst.u3.ip		= exp->saved_addr.ip;
+			*daddr			= exp->saved_addr;
+			tuple.dst.u3		= exp->saved_addr;
 			tuple.dst.u.udp.port	= exp->saved_proto.udp.port;
 			direct_rtp = 1;
 		} else
@@ -987,8 +996,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 			  IPPROTO_UDP, NULL, &rtcp_port);
 
 	nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook);
-	if (nf_nat_sdp_media && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
-	    ct->status & IPS_NAT_MASK && !direct_rtp)
+	if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp)
 		ret = nf_nat_sdp_media(skb, protoff, dataoff, dptr, datalen,
 				       rtp_exp, rtcp_exp,
 				       mediaoff, medialen, daddr);
@@ -1044,15 +1052,12 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 	unsigned int i;
 	union nf_inet_addr caddr, maddr, rtp_addr;
 	unsigned int port;
-	enum sdp_header_types c_hdr;
 	const struct sdp_media_type *t;
 	int ret = NF_ACCEPT;
 	typeof(nf_nat_sdp_addr_hook) nf_nat_sdp_addr;
 	typeof(nf_nat_sdp_session_hook) nf_nat_sdp_session;
 
 	nf_nat_sdp_addr = rcu_dereference(nf_nat_sdp_addr_hook);
-	c_hdr = nf_ct_l3num(ct) == AF_INET ? SDP_HDR_CONNECTION_IP4 :
-					     SDP_HDR_CONNECTION_IP6;
 
 	/* Find beginning of session description */
 	if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
@@ -1066,7 +1071,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 	 * the end of the session description. */
 	caddr_len = 0;
 	if (ct_sip_parse_sdp_addr(ct, *dptr, sdpoff, *datalen,
-				  c_hdr, SDP_HDR_MEDIA,
+				  SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
 				  &matchoff, &matchlen, &caddr) > 0)
 		caddr_len = matchlen;
 
@@ -1096,7 +1101,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 		/* The media description overrides the session description. */
 		maddr_len = 0;
 		if (ct_sip_parse_sdp_addr(ct, *dptr, mediaoff, *datalen,
-					  c_hdr, SDP_HDR_MEDIA,
+					  SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
 					  &matchoff, &matchlen, &maddr) > 0) {
 			maddr_len = matchlen;
 			memcpy(&rtp_addr, &maddr, sizeof(rtp_addr));
@@ -1113,11 +1118,10 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 			return ret;
 
 		/* Update media connection address if present */
-		if (maddr_len && nf_nat_sdp_addr &&
-		    nf_ct_l3num(ct) == NFPROTO_IPV4 && ct->status & IPS_NAT_MASK) {
+		if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) {
 			ret = nf_nat_sdp_addr(skb, protoff, dataoff,
-					      dptr, datalen,
-					      mediaoff, c_hdr, SDP_HDR_MEDIA,
+					      dptr, datalen, mediaoff,
+					      SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
 					      &rtp_addr);
 			if (ret != NF_ACCEPT)
 				return ret;
@@ -1127,8 +1131,7 @@ static int process_sdp(struct sk_buff *skb, unsigned int protoff,
 
 	/* Update session connection and owner addresses */
 	nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook);
-	if (nf_nat_sdp_session && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
-	    ct->status & IPS_NAT_MASK)
+	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
 		ret = nf_nat_sdp_session(skb, protoff, dataoff,
 					 dptr, datalen, sdpoff, &rtp_addr);
 
@@ -1293,8 +1296,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 	exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
 
 	nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook);
-	if (nf_nat_sip_expect && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
-	    ct->status & IPS_NAT_MASK)
+	if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK)
 		ret = nf_nat_sip_expect(skb, protoff, dataoff, dptr, datalen,
 					exp, matchoff, matchlen);
 	else {
@@ -1476,8 +1478,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct,
 	else
 		ret = process_sip_response(skb, protoff, dataoff, dptr, datalen);
 
-	if (ret == NF_ACCEPT && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
-	    ct->status & IPS_NAT_MASK) {
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
 		nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
 		if (nf_nat_sip && !nf_nat_sip(skb, protoff, dataoff,
 					      dptr, datalen))
@@ -1560,11 +1561,10 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
 		datalen  = datalen + diff - msglen;
 	}
 
-	if (ret == NF_ACCEPT && nf_ct_l3num(ct) == NFPROTO_IPV4 &&
-	    ct->status & IPS_NAT_MASK) {
+	if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) {
 		nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook);
 		if (nf_nat_sip_seq_adjust)
-			nf_nat_sip_seq_adjust(skb, tdiff);
+			nf_nat_sip_seq_adjust(skb, protoff, tdiff);
 	}
 
 	return ret;
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
new file mode 100644
index 000000000000..f4db3a7bd285
--- /dev/null
+++ b/net/netfilter/nf_nat_sip.c
@@ -0,0 +1,609 @@
+/* SIP extension for NAT alteration.
+ *
+ * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
+ * based on RR's ip_nat_ftp.c and other modules.
+ * (C) 2007 United Security Providers
+ * (C) 2007, 2008, 2011, 2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_sip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
+MODULE_DESCRIPTION("SIP NAT helper");
+MODULE_ALIAS("ip_nat_sip");
+
+
+static unsigned int mangle_packet(struct sk_buff *skb, unsigned int protoff,
+				  unsigned int dataoff,
+				  const char **dptr, unsigned int *datalen,
+				  unsigned int matchoff, unsigned int matchlen,
+				  const char *buffer, unsigned int buflen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	struct tcphdr *th;
+	unsigned int baseoff;
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP) {
+		th = (struct tcphdr *)(skb->data + protoff);
+		baseoff = protoff + th->doff * 4;
+		matchoff += dataoff - baseoff;
+
+		if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+						protoff, matchoff, matchlen,
+						buffer, buflen, false))
+			return 0;
+	} else {
+		baseoff = protoff + sizeof(struct udphdr);
+		matchoff += dataoff - baseoff;
+
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
+					      protoff, matchoff, matchlen,
+					      buffer, buflen))
+			return 0;
+	}
+
+	/* Reload data pointer and adjust datalen value */
+	*dptr = skb->data + dataoff;
+	*datalen += buflen - matchlen;
+	return 1;
+}
+
+static int sip_sprintf_addr(const struct nf_conn *ct, char *buffer,
+			    const union nf_inet_addr *addr, bool delim)
+{
+	if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+		return sprintf(buffer, "%pI4", &addr->ip);
+	else {
+		if (delim)
+			return sprintf(buffer, "[%pI6c]", &addr->ip6);
+		else
+			return sprintf(buffer, "%pI6c", &addr->ip6);
+	}
+}
+
+static int sip_sprintf_addr_port(const struct nf_conn *ct, char *buffer,
+				 const union nf_inet_addr *addr, u16 port)
+{
+	if (nf_ct_l3num(ct) == NFPROTO_IPV4)
+		return sprintf(buffer, "%pI4:%u", &addr->ip, port);
+	else
+		return sprintf(buffer, "[%pI6c]:%u", &addr->ip6, port);
+}
+
+static int map_addr(struct sk_buff *skb, unsigned int protoff,
+		    unsigned int dataoff,
+		    const char **dptr, unsigned int *datalen,
+		    unsigned int matchoff, unsigned int matchlen,
+		    union nf_inet_addr *addr, __be16 port)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
+	unsigned int buflen;
+	union nf_inet_addr newaddr;
+	__be16 newport;
+
+	if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, addr) &&
+	    ct->tuplehash[dir].tuple.src.u.udp.port == port) {
+		newaddr = ct->tuplehash[!dir].tuple.dst.u3;
+		newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
+	} else if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, addr) &&
+		   ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
+		newaddr = ct->tuplehash[!dir].tuple.src.u3;
+		newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
+	} else
+		return 1;
+
+	if (nf_inet_addr_cmp(&newaddr, addr) && newport == port)
+		return 1;
+
+	buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, ntohs(newport));
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen);
+}
+
+static int map_sip_addr(struct sk_buff *skb, unsigned int protoff,
+			unsigned int dataoff,
+			const char **dptr, unsigned int *datalen,
+			enum sip_header_types type)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	unsigned int matchlen, matchoff;
+	union nf_inet_addr addr;
+	__be16 port;
+
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
+				    &matchoff, &matchlen, &addr, &port) <= 0)
+		return 1;
+	return map_addr(skb, protoff, dataoff, dptr, datalen,
+			matchoff, matchlen, &addr, port);
+}
+
+static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff,
+			       unsigned int dataoff,
+			       const char **dptr, unsigned int *datalen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	unsigned int coff, matchoff, matchlen;
+	enum sip_header_types hdr;
+	union nf_inet_addr addr;
+	__be16 port;
+	int request, in_header;
+
+	/* Basic rules: requests and responses. */
+	if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
+		if (ct_sip_parse_request(ct, *dptr, *datalen,
+					 &matchoff, &matchlen,
+					 &addr, &port) > 0 &&
+		    !map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen, &addr, port))
+			return NF_DROP;
+		request = 1;
+	} else
+		request = 0;
+
+	if (nf_ct_protonum(ct) == IPPROTO_TCP)
+		hdr = SIP_HDR_VIA_TCP;
+	else
+		hdr = SIP_HDR_VIA_UDP;
+
+	/* Translate topmost Via header and parameters */
+	if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
+				    hdr, NULL, &matchoff, &matchlen,
+				    &addr, &port) > 0) {
+		unsigned int olen, matchend, poff, plen, buflen, n;
+		char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
+
+		/* We're only interested in headers related to this
+		 * connection */
+		if (request) {
+			if (!nf_inet_addr_cmp(&addr,
+					&ct->tuplehash[dir].tuple.src.u3) ||
+			    port != ct->tuplehash[dir].tuple.src.u.udp.port)
+				goto next;
+		} else {
+			if (!nf_inet_addr_cmp(&addr,
+					&ct->tuplehash[dir].tuple.dst.u3) ||
+			    port != ct->tuplehash[dir].tuple.dst.u.udp.port)
+				goto next;
+		}
+
+		olen = *datalen;
+		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen, &addr, port))
+			return NF_DROP;
+
+		matchend = matchoff + matchlen + *datalen - olen;
+
+		/* The maddr= parameter (RFC 2361) specifies where to send
+		 * the reply. */
+		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+					       "maddr=", &poff, &plen,
+					       &addr, true) > 0 &&
+		    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.src.u3) &&
+		    !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3)) {
+			buflen = sip_sprintf_addr(ct, buffer,
+					&ct->tuplehash[!dir].tuple.dst.u3,
+					true);
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
+				return NF_DROP;
+		}
+
+		/* The received= parameter (RFC 2361) contains the address
+		 * from which the server received the request. */
+		if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
+					       "received=", &poff, &plen,
+					       &addr, false) > 0 &&
+		    nf_inet_addr_cmp(&addr, &ct->tuplehash[dir].tuple.dst.u3) &&
+		    !nf_inet_addr_cmp(&addr, &ct->tuplehash[!dir].tuple.src.u3)) {
+			buflen = sip_sprintf_addr(ct, buffer,
+					&ct->tuplehash[!dir].tuple.src.u3,
+					false);
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
+				return NF_DROP;
+		}
+
+		/* The rport= parameter (RFC 3581) contains the port number
+		 * from which the server received the request. */
+		if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
+						 "rport=", &poff, &plen,
+						 &n) > 0 &&
+		    htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
+		    htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
+			__be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
+			buflen = sprintf(buffer, "%u", ntohs(p));
+			if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+					   poff, plen, buffer, buflen))
+				return NF_DROP;
+		}
+	}
+
+next:
+	/* Translate Contact headers */
+	coff = 0;
+	in_header = 0;
+	while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen,
+				       SIP_HDR_CONTACT, &in_header,
+				       &matchoff, &matchlen,
+				       &addr, &port) > 0) {
+		if (!map_addr(skb, protoff, dataoff, dptr, datalen,
+			      matchoff, matchlen,
+			      &addr, port))
+			return NF_DROP;
+	}
+
+	if (!map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_FROM) ||
+	    !map_sip_addr(skb, protoff, dataoff, dptr, datalen, SIP_HDR_TO))
+		return NF_DROP;
+
+	return NF_ACCEPT;
+}
+
+static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
+				  s16 off)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	const struct tcphdr *th;
+
+	if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0)
+		return;
+
+	th = (struct tcphdr *)(skb->data + protoff);
+	nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off);
+}
+
+/* Handles expected signalling connections and media streams */
+static void nf_nat_sip_expected(struct nf_conn *ct,
+				struct nf_conntrack_expect *exp)
+{
+	struct nf_nat_range range;
+
+	/* This must be a fresh one. */
+	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+	/* For DST manip, map port here to where it's expected. */
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+	range.min_proto = range.max_proto = exp->saved_proto;
+	range.min_addr = range.max_addr = exp->saved_addr;
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+
+	/* Change src to where master sends to, but only if the connection
+	 * actually came from the same source. */
+	if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
+			     &ct->master->tuplehash[exp->dir].tuple.src.u3)) {
+		range.flags = NF_NAT_RANGE_MAP_IPS;
+		range.min_addr = range.max_addr
+			= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+	}
+}
+
+static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
+				      unsigned int dataoff,
+				      const char **dptr, unsigned int *datalen,
+				      struct nf_conntrack_expect *exp,
+				      unsigned int matchoff,
+				      unsigned int matchlen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	union nf_inet_addr newaddr;
+	u_int16_t port;
+	char buffer[INET6_ADDRSTRLEN + sizeof("[]:nnnnn")];
+	unsigned int buflen;
+
+	/* Connection will come from reply */
+	if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+			     &ct->tuplehash[!dir].tuple.dst.u3))
+		newaddr = exp->tuple.dst.u3;
+	else
+		newaddr = ct->tuplehash[!dir].tuple.dst.u3;
+
+	/* If the signalling port matches the connection's source port in the
+	 * original direction, try to use the destination port in the opposite
+	 * direction. */
+	if (exp->tuple.dst.u.udp.port ==
+	    ct->tuplehash[dir].tuple.src.u.udp.port)
+		port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
+	else
+		port = ntohs(exp->tuple.dst.u.udp.port);
+
+	exp->saved_addr = exp->tuple.dst.u3;
+	exp->tuple.dst.u3 = newaddr;
+	exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
+	exp->dir = !dir;
+	exp->expectfn = nf_nat_sip_expected;
+
+	for (; port != 0; port++) {
+		int ret;
+
+		exp->tuple.dst.u.udp.port = htons(port);
+		ret = nf_ct_expect_related(exp);
+		if (ret == 0)
+			break;
+		else if (ret != -EBUSY) {
+			port = 0;
+			break;
+		}
+	}
+
+	if (port == 0)
+		return NF_DROP;
+
+	if (!nf_inet_addr_cmp(&exp->tuple.dst.u3, &exp->saved_addr) ||
+	    exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
+		buflen = sip_sprintf_addr_port(ct, buffer, &newaddr, port);
+		if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+				   matchoff, matchlen, buffer, buflen))
+			goto err;
+	}
+	return NF_ACCEPT;
+
+err:
+	nf_ct_unexpect_related(exp);
+	return NF_DROP;
+}
+
+static int mangle_content_len(struct sk_buff *skb, unsigned int protoff,
+			      unsigned int dataoff,
+			      const char **dptr, unsigned int *datalen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	unsigned int matchoff, matchlen;
+	char buffer[sizeof("65536")];
+	int buflen, c_len;
+
+	/* Get actual SDP length */
+	if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
+				  SDP_HDR_VERSION, SDP_HDR_UNSPEC,
+				  &matchoff, &matchlen) <= 0)
+		return 0;
+	c_len = *datalen - matchoff + strlen("v=");
+
+	/* Now, update SDP length */
+	if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
+			      &matchoff, &matchlen) <= 0)
+		return 0;
+
+	buflen = sprintf(buffer, "%u", c_len);
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen);
+}
+
+static int mangle_sdp_packet(struct sk_buff *skb, unsigned int protoff,
+			     unsigned int dataoff,
+			     const char **dptr, unsigned int *datalen,
+			     unsigned int sdpoff,
+			     enum sdp_header_types type,
+			     enum sdp_header_types term,
+			     char *buffer, int buflen)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	unsigned int matchlen, matchoff;
+
+	if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term,
+				  &matchoff, &matchlen) <= 0)
+		return -ENOENT;
+	return mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			     matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL;
+}
+
+static unsigned int nf_nat_sdp_addr(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
+				    unsigned int sdpoff,
+				    enum sdp_header_types type,
+				    enum sdp_header_types term,
+				    const union nf_inet_addr *addr)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	char buffer[INET6_ADDRSTRLEN];
+	unsigned int buflen;
+
+	buflen = sip_sprintf_addr(ct, buffer, addr, false);
+	if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen,
+			      sdpoff, type, term, buffer, buflen))
+		return 0;
+
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
+}
+
+static unsigned int nf_nat_sdp_port(struct sk_buff *skb, unsigned int protoff,
+				    unsigned int dataoff,
+				    const char **dptr, unsigned int *datalen,
+				    unsigned int matchoff,
+				    unsigned int matchlen,
+				    u_int16_t port)
+{
+	char buffer[sizeof("nnnnn")];
+	unsigned int buflen;
+
+	buflen = sprintf(buffer, "%u", port);
+	if (!mangle_packet(skb, protoff, dataoff, dptr, datalen,
+			   matchoff, matchlen, buffer, buflen))
+		return 0;
+
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
+}
+
+static unsigned int nf_nat_sdp_session(struct sk_buff *skb, unsigned int protoff,
+				       unsigned int dataoff,
+				       const char **dptr, unsigned int *datalen,
+				       unsigned int sdpoff,
+				       const union nf_inet_addr *addr)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	char buffer[INET6_ADDRSTRLEN];
+	unsigned int buflen;
+
+	/* Mangle session description owner and contact addresses */
+	buflen = sip_sprintf_addr(ct, buffer, addr, false);
+	if (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
+			      SDP_HDR_OWNER, SDP_HDR_MEDIA, buffer, buflen))
+		return 0;
+
+	switch (mangle_sdp_packet(skb, protoff, dataoff, dptr, datalen, sdpoff,
+				  SDP_HDR_CONNECTION, SDP_HDR_MEDIA,
+				  buffer, buflen)) {
+	case 0:
+	/*
+	 * RFC 2327:
+	 *
+	 * Session description
+	 *
+	 * c=* (connection information - not required if included in all media)
+	 */
+	case -ENOENT:
+		break;
+	default:
+		return 0;
+	}
+
+	return mangle_content_len(skb, protoff, dataoff, dptr, datalen);
+}
+
+/* So, this packet has hit the connection tracking matching code.
+   Mangle it, and change the expectation to match the new version. */
+static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
+				     unsigned int dataoff,
+				     const char **dptr, unsigned int *datalen,
+				     struct nf_conntrack_expect *rtp_exp,
+				     struct nf_conntrack_expect *rtcp_exp,
+				     unsigned int mediaoff,
+				     unsigned int medialen,
+				     union nf_inet_addr *rtp_addr)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+	u_int16_t port;
+
+	/* Connection will come from reply */
+	if (nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+			     &ct->tuplehash[!dir].tuple.dst.u3))
+		*rtp_addr = rtp_exp->tuple.dst.u3;
+	else
+		*rtp_addr = ct->tuplehash[!dir].tuple.dst.u3;
+
+	rtp_exp->saved_addr = rtp_exp->tuple.dst.u3;
+	rtp_exp->tuple.dst.u3 = *rtp_addr;
+	rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+	rtp_exp->dir = !dir;
+	rtp_exp->expectfn = nf_nat_sip_expected;
+
+	rtcp_exp->saved_addr = rtcp_exp->tuple.dst.u3;
+	rtcp_exp->tuple.dst.u3 = *rtp_addr;
+	rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+	rtcp_exp->dir = !dir;
+	rtcp_exp->expectfn = nf_nat_sip_expected;
+
+	/* Try to get same pair of ports: if not, try to change them. */
+	for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+	     port != 0; port += 2) {
+		int ret;
+
+		rtp_exp->tuple.dst.u.udp.port = htons(port);
+		ret = nf_ct_expect_related(rtp_exp);
+		if (ret == -EBUSY)
+			continue;
+		else if (ret < 0) {
+			port = 0;
+			break;
+		}
+		rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
+		ret = nf_ct_expect_related(rtcp_exp);
+		if (ret == 0)
+			break;
+		else if (ret != -EBUSY) {
+			nf_ct_unexpect_related(rtp_exp);
+			port = 0;
+			break;
+		}
+	}
+
+	if (port == 0)
+		goto err1;
+
+	/* Update media port. */
+	if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
+	    !nf_nat_sdp_port(skb, protoff, dataoff, dptr, datalen,
+			     mediaoff, medialen, port))
+		goto err2;
+
+	return NF_ACCEPT;
+
+err2:
+	nf_ct_unexpect_related(rtp_exp);
+	nf_ct_unexpect_related(rtcp_exp);
+err1:
+	return NF_DROP;
+}
+
+static struct nf_ct_helper_expectfn sip_nat = {
+	.name		= "sip",
+	.expectfn	= nf_nat_sip_expected,
+};
+
+static void __exit nf_nat_sip_fini(void)
+{
+	RCU_INIT_POINTER(nf_nat_sip_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sip_expect_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sdp_port_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sdp_session_hook, NULL);
+	RCU_INIT_POINTER(nf_nat_sdp_media_hook, NULL);
+	nf_ct_helper_expectfn_unregister(&sip_nat);
+	synchronize_rcu();
+}
+
+static int __init nf_nat_sip_init(void)
+{
+	BUG_ON(nf_nat_sip_hook != NULL);
+	BUG_ON(nf_nat_sip_seq_adjust_hook != NULL);
+	BUG_ON(nf_nat_sip_expect_hook != NULL);
+	BUG_ON(nf_nat_sdp_addr_hook != NULL);
+	BUG_ON(nf_nat_sdp_port_hook != NULL);
+	BUG_ON(nf_nat_sdp_session_hook != NULL);
+	BUG_ON(nf_nat_sdp_media_hook != NULL);
+	RCU_INIT_POINTER(nf_nat_sip_hook, nf_nat_sip);
+	RCU_INIT_POINTER(nf_nat_sip_seq_adjust_hook, nf_nat_sip_seq_adjust);
+	RCU_INIT_POINTER(nf_nat_sip_expect_hook, nf_nat_sip_expect);
+	RCU_INIT_POINTER(nf_nat_sdp_addr_hook, nf_nat_sdp_addr);
+	RCU_INIT_POINTER(nf_nat_sdp_port_hook, nf_nat_sdp_port);
+	RCU_INIT_POINTER(nf_nat_sdp_session_hook, nf_nat_sdp_session);
+	RCU_INIT_POINTER(nf_nat_sdp_media_hook, nf_nat_sdp_media);
+	nf_ct_helper_expectfn_register(&sip_nat);
+	return 0;
+}
+
+module_init(nf_nat_sip_init);
+module_exit(nf_nat_sip_fini);
-- 
cgit v1.2.3


From 8a91bb0c304b0853f8c59b1b48c7822c52362cba Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 26 Aug 2012 19:14:31 +0200
Subject: netfilter: ip6tables: add stateless IPv6-to-IPv6 Network Prefix
 Translation target

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_ipv6/Kbuild     |   1 +
 include/linux/netfilter_ipv6/ip6t_NPT.h |  16 ++++
 net/ipv6/netfilter/Kconfig              |   9 ++
 net/ipv6/netfilter/Makefile             |   1 +
 net/ipv6/netfilter/ip6t_NPT.c           | 165 ++++++++++++++++++++++++++++++++
 5 files changed, 192 insertions(+)
 create mode 100644 include/linux/netfilter_ipv6/ip6t_NPT.h
 create mode 100644 net/ipv6/netfilter/ip6t_NPT.c

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild
index bd095bc075e9..b88c0058bf73 100644
--- a/include/linux/netfilter_ipv6/Kbuild
+++ b/include/linux/netfilter_ipv6/Kbuild
@@ -1,6 +1,7 @@
 header-y += ip6_tables.h
 header-y += ip6t_HL.h
 header-y += ip6t_LOG.h
+header-y += ip6t_NPT.h
 header-y += ip6t_REJECT.h
 header-y += ip6t_ah.h
 header-y += ip6t_frag.h
diff --git a/include/linux/netfilter_ipv6/ip6t_NPT.h b/include/linux/netfilter_ipv6/ip6t_NPT.h
new file mode 100644
index 000000000000..f763355481b5
--- /dev/null
+++ b/include/linux/netfilter_ipv6/ip6t_NPT.h
@@ -0,0 +1,16 @@
+#ifndef __NETFILTER_IP6T_NPT
+#define __NETFILTER_IP6T_NPT
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+
+struct ip6t_npt_tginfo {
+	union nf_inet_addr	src_pfx;
+	union nf_inet_addr	dst_pfx;
+	__u8			src_pfx_len;
+	__u8			dst_pfx_len;
+	/* Used internally by the kernel */
+	__sum16			adjustment;
+};
+
+#endif /* __NETFILTER_IP6T_NPT */
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 7bdf73be9a99..3b73254d7bf1 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -177,6 +177,15 @@ config IP6_NF_TARGET_REDIRECT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP6_NF_TARGET_NPT
+	tristate "NPT (Network Prefix translation) target support"
+	depends on NETFILTER_ADVANCED
+	help
+	  This option adds the `SNPT' and `DNPT' target, which perform
+	  stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config IP6_NF_FILTER
 	tristate "Packet filtering"
 	default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 0864ce601651..5752132ca159 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -36,5 +36,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
 # targets
 obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
 obj-$(CONFIG_IP6_NF_TARGET_NETMAP) += ip6t_NETMAP.o
+obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o
 obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o
 obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
new file mode 100644
index 000000000000..e9486915eff6
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2011, 2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6t_NPT.h>
+#include <linux/netfilter/x_tables.h>
+
+static __sum16 csum16_complement(__sum16 a)
+{
+	return (__force __sum16)(0xffff - (__force u16)a);
+}
+
+static __sum16 csum16_add(__sum16 a, __sum16 b)
+{
+	u16 sum;
+
+	sum = (__force u16)a + (__force u16)b;
+	sum += (__force u16)a < (__force u16)b;
+	return (__force __sum16)sum;
+}
+
+static __sum16 csum16_sub(__sum16 a, __sum16 b)
+{
+	return csum16_add(a, csum16_complement(b));
+}
+
+static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
+{
+	struct ip6t_npt_tginfo *npt = par->targinfo;
+	__sum16 src_sum = 0, dst_sum = 0;
+	unsigned int i;
+
+	if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) {
+		src_sum = csum16_add(src_sum,
+				(__force __sum16)npt->src_pfx.in6.s6_addr16[i]);
+		dst_sum = csum16_add(dst_sum,
+				(__force __sum16)npt->dst_pfx.in6.s6_addr16[i]);
+	}
+
+	npt->adjustment = csum16_sub(src_sum, dst_sum);
+	return 0;
+}
+
+static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt,
+			     struct in6_addr *addr)
+{
+	unsigned int pfx_len;
+	unsigned int i, idx;
+	__be32 mask;
+	__sum16 sum;
+
+	pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len);
+	for (i = 0; i < pfx_len; i += 32) {
+		if (pfx_len - i >= 32)
+			mask = 0;
+		else
+			mask = htonl(~((1 << (pfx_len - i)) - 1));
+
+		idx = i / 32;
+		addr->s6_addr32[idx] &= mask;
+		addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx];
+	}
+
+	if (pfx_len <= 48)
+		idx = 3;
+	else {
+		for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) {
+			if ((__force __sum16)addr->s6_addr16[idx] !=
+			    CSUM_MANGLED_0)
+				break;
+		}
+		if (idx == ARRAY_SIZE(addr->s6_addr16))
+			return false;
+	}
+
+	sum = csum16_add((__force __sum16)addr->s6_addr16[idx],
+			 npt->adjustment);
+	if (sum == CSUM_MANGLED_0)
+		sum = 0;
+	*(__force __sum16 *)&addr->s6_addr16[idx] = sum;
+
+	return true;
+}
+
+static unsigned int
+ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+	if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) {
+		icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+			    offsetof(struct ipv6hdr, saddr));
+		return NF_DROP;
+	}
+	return XT_CONTINUE;
+}
+
+static unsigned int
+ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct ip6t_npt_tginfo *npt = par->targinfo;
+
+	if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) {
+		icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD,
+			    offsetof(struct ipv6hdr, daddr));
+		return NF_DROP;
+	}
+	return XT_CONTINUE;
+}
+
+static struct xt_target ip6t_npt_target_reg[] __read_mostly = {
+	{
+		.name		= "SNPT",
+		.target		= ip6t_snpt_tg,
+		.targetsize	= sizeof(struct ip6t_npt_tginfo),
+		.checkentry	= ip6t_npt_checkentry,
+		.family		= NFPROTO_IPV6,
+		.hooks		= (1 << NF_INET_LOCAL_IN) |
+				  (1 << NF_INET_POST_ROUTING),
+		.me		= THIS_MODULE,
+	},
+	{
+		.name		= "DNPT",
+		.target		= ip6t_dnpt_tg,
+		.targetsize	= sizeof(struct ip6t_npt_tginfo),
+		.checkentry	= ip6t_npt_checkentry,
+		.family		= NFPROTO_IPV6,
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_OUT),
+		.me		= THIS_MODULE,
+	},
+};
+
+static int __init ip6t_npt_init(void)
+{
+	return xt_register_targets(ip6t_npt_target_reg,
+				   ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+static void __exit ip6t_npt_exit(void)
+{
+	xt_unregister_targets(ip6t_npt_target_reg,
+			      ARRAY_SIZE(ip6t_npt_target_reg));
+}
+
+module_init(ip6t_npt_init);
+module_exit(ip6t_npt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS("ip6t_SNPT");
+MODULE_ALIAS("ip6t_DNPT");
-- 
cgit v1.2.3


From f9dc9ac51610a35629c8211b6b8c9b0c65cf0e1d Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Date: Fri, 24 Aug 2012 01:58:59 +0000
Subject: of/mdio: Add dummy functions in of_mdio.h.

This patch adds dummy functions in of_mdio.h, so that driver need not
ifdef there code with CONFIG_OF.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_mdio.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 912c27a0f7ee..6ef49b803efb 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -12,6 +12,7 @@
 #include <linux/phy.h>
 #include <linux/of.h>
 
+#ifdef CONFIG_OF
 extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np);
 extern struct phy_device *of_phy_find_device(struct device_node *phy_np);
 extern struct phy_device *of_phy_connect(struct net_device *dev,
@@ -24,4 +25,36 @@ extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 
+#else /* CONFIG_OF */
+int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
+{
+	return -ENOSYS;
+}
+
+struct phy_device *of_phy_find_device(struct device_node *phy_np)
+{
+	return NULL;
+}
+
+struct phy_device *of_phy_connect(struct net_device *dev,
+					 struct device_node *phy_np,
+					 void (*hndlr)(struct net_device *),
+					 u32 flags, phy_interface_t iface)
+{
+	return NULL;
+}
+
+struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
+					 void (*hndlr)(struct net_device *),
+					 phy_interface_t iface)
+{
+	return NULL;
+}
+
+struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
+{
+	return NULL;
+}
+#endif /* CONFIG_OF */
+
 #endif /* __LINUX_OF_MDIO_H */
-- 
cgit v1.2.3


From 6c9ff979d1921e9fd05d89e1383121c2503759b9 Mon Sep 17 00:00:00 2001
From: Alex Bergmann <alex@linlab.net>
Date: Fri, 31 Aug 2012 02:48:31 +0000
Subject: tcp: Increase timeout for SYN segments

Commit 9ad7c049 ("tcp: RFC2988bis + taking RTT sample from 3WHS for
the passive open side") changed the initRTO from 3secs to 1sec in
accordance to RFC6298 (former RFC2988bis). This reduced the time till
the last SYN retransmission packet gets sent from 93secs to 31secs.

RFC1122 is stating that the retransmission should be done for at least 3
minutes, but this seems to be quite high.

  "However, the values of R1 and R2 may be different for SYN
  and data segments.  In particular, R2 for a SYN segment MUST
  be set large enough to provide retransmission of the segment
  for at least 3 minutes.  The application can close the
  connection (i.e., give up on the open attempt) sooner, of
  course."

This patch increases the value of TCP_SYN_RETRIES to the value of 6,
providing a retransmission window of 63secs.

The comments for SYN and SYNACK retries have also been updated to
describe the current settings. The same goes for the documentation file
"Documentation/networking/ip-sysctl.txt".

Signed-off-by: Alexander Bergmann <alex@linlab.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  8 ++++++--
 include/net/tcp.h                      | 18 ++++++++++++++----
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ca447b35b833..d64e53124b8c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -439,7 +439,9 @@ tcp_stdurg - BOOLEAN
 tcp_synack_retries - INTEGER
 	Number of times SYNACKs for a passive TCP connection attempt will
 	be retransmitted. Should not be higher than 255. Default value
-	is 5, which corresponds to ~180seconds.
+	is 5, which corresponds to 31seconds till the last retransmission
+	with the current initial RTO of 1second. With this the final timeout
+	for a passive TCP connection will happen after 63seconds.
 
 tcp_syncookies - BOOLEAN
 	Only valid when the kernel was compiled with CONFIG_SYNCOOKIES
@@ -478,7 +480,9 @@ tcp_fastopen - INTEGER
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 255. Default value
-	is 5, which corresponds to ~180seconds.
+	is 6, which corresponds to 63seconds till the last restransmission
+	with the current initial RTO of 1second. With this the final timeout
+	for an active TCP connection attempt will happen after 127seconds.
 
 tcp_timestamps - BOOLEAN
 	Enable timestamps as defined in RFC1323.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9a0021d16d91..0fca06f16463 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -98,11 +98,21 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 				 * 15 is ~13-30min depending on RTO.
 				 */
 
-#define TCP_SYN_RETRIES	 5	/* number of times to retry active opening a
-				 * connection: ~180sec is RFC minimum	*/
+#define TCP_SYN_RETRIES	 6	/* This is how many retries are done
+				 * when active opening a connection.
+				 * RFC1122 says the minimum retry MUST
+				 * be at least 180secs.  Nevertheless
+				 * this value is corresponding to
+				 * 63secs of retransmission with the
+				 * current initial RTO.
+				 */
 
-#define TCP_SYNACK_RETRIES 5	/* number of times to retry passive opening a
-				 * connection: ~180sec is RFC minimum	*/
+#define TCP_SYNACK_RETRIES 5	/* This is how may retries are done
+				 * when passive opening a connection.
+				 * This is corresponding to 31secs of
+				 * retransmission with the current
+				 * initial RTO.
+				 */
 
 #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
 				  * state, about 60 seconds	*/
-- 
cgit v1.2.3


From d56631a66c0d0c9d662abfb38cd1f6326eeebd7c Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Date: Thu, 30 Aug 2012 05:50:43 +0000
Subject: net:stmmac: Remove bus_id from mdio platform data.

This patch removes bus_id from mdio platform data, The reason to remove
bus_id is, stmmac mdio bus_id is always same as stmmac bus-id, so there
is no point in passing this in different variable.
Also stmmac ethernet driver connects to phy with bus_id passed its
platform data.
So, having single bus-id is much simpler.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/stmmac.txt               | 5 -----
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 8 +++-----
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c  | 1 -
 include/linux/stmmac.h                            | 1 -
 4 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index c676b9cedbd0..ef9ee71b4d7f 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -173,7 +173,6 @@ Where:
 For MDIO bus The we have:
 
  struct stmmac_mdio_bus_data {
-	int bus_id;
 	int (*phy_reset)(void *priv);
 	unsigned int phy_mask;
 	int *irqs;
@@ -181,7 +180,6 @@ For MDIO bus The we have:
  };
 
 Where:
- o bus_id: bus identifier;
  o phy_reset: hook to reset the phy device attached to the bus.
  o phy_mask: phy mask passed when register the MDIO bus within the driver.
  o irqs: list of IRQs, one per PHY.
@@ -230,9 +228,6 @@ there are two MAC cores: one MAC is for MDIO Bus/PHY emulation
 with fixed_link support.
 
 static struct stmmac_mdio_bus_data stmmac1_mdio_bus = {
-	.bus_id = 1,
-		|
-		|-> phy device on the bus_id 1
 	.phy_reset = phy_reset;
 		|
 		|-> function to provide the phy_reset on this board
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index e1f3d04a8c90..0376a5e6b2bf 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -177,7 +177,7 @@ int stmmac_mdio_register(struct net_device *ndev)
 	new_bus->write = &stmmac_mdio_write;
 	new_bus->reset = &stmmac_mdio_reset;
 	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x",
-		new_bus->name, mdio_bus_data->bus_id);
+		new_bus->name, priv->plat->bus_id);
 	new_bus->priv = ndev;
 	new_bus->irq = irqlist;
 	new_bus->phy_mask = mdio_bus_data->phy_mask;
@@ -213,12 +213,10 @@ int stmmac_mdio_register(struct net_device *ndev)
 			 * and no PHY number was provided to the MAC,
 			 * use the one probed here.
 			 */
-			if ((priv->plat->bus_id == mdio_bus_data->bus_id) &&
-			    (priv->plat->phy_addr == -1))
+			if (priv->plat->phy_addr == -1)
 				priv->plat->phy_addr = addr;
 
-			act = (priv->plat->bus_id == mdio_bus_data->bus_id) &&
-				(priv->plat->phy_addr == addr);
+			act = (priv->plat->phy_addr == addr);
 			switch (phydev->irq) {
 			case PHY_POLL:
 				irq_str = "POLL";
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 13afb8edfadc..1f069b0f6af5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -40,7 +40,6 @@ static void stmmac_default_data(void)
 	plat_dat.has_gmac = 1;
 	plat_dat.force_sf_dma_mode = 1;
 
-	mdio_data.bus_id = 1;
 	mdio_data.phy_reset = NULL;
 	mdio_data.phy_mask = 0;
 	plat_dat.mdio_bus_data = &mdio_data;
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index b69bdb1e08b6..a1547ea3920d 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -76,7 +76,6 @@
 /* Platfrom data for platform device structure's platform_data field */
 
 struct stmmac_mdio_bus_data {
-	int bus_id;
 	int (*phy_reset)(void *priv);
 	unsigned int phy_mask;
 	int *irqs;
-- 
cgit v1.2.3


From 1046716368979dee857a2b8a91c4a8833f21b9cb Mon Sep 17 00:00:00 2001
From: Jerry Chu <hkchu@google.com>
Date: Fri, 31 Aug 2012 12:29:11 +0000
Subject: tcp: TCP Fast Open Server - header & support functions

This patch adds all the necessary data structure and support
functions to implement TFO server side. It also documents a number
of flags for the sysctl_tcp_fastopen knob, and adds a few Linux
extension MIBs.

In addition, it includes the following:

1. a new TCP_FASTOPEN socket option an application must call to
supply a max backlog allowed in order to enable TFO on its listener.

2. A number of key data structures:
"fastopen_rsk" in tcp_sock - for a big socket to access its
request_sock for retransmission and ack processing purpose. It is
non-NULL iff 3WHS not completed.

"fastopenq" in request_sock_queue - points to a per Fast Open
listener data structure "fastopen_queue" to keep track of qlen (# of
outstanding Fast Open requests) and max_qlen, among other things.

"listener" in tcp_request_sock - to point to the original listener
for book-keeping purpose, i.e., to maintain qlen against max_qlen
as part of defense against IP spoofing attack.

3. various data structure and functions, many in tcp_fastopen.c, to
support server side Fast Open cookie operations, including
/proc/sys/net/ipv4/tcp_fastopen_key to allow manual rekeying.

Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 29 +++++++++---
 include/linux/snmp.h                   |  4 ++
 include/linux/tcp.h                    | 45 ++++++++++++++++--
 include/net/request_sock.h             | 36 +++++++++++++++
 include/net/tcp.h                      | 46 ++++++++++++++++---
 net/ipv4/proc.c                        |  4 ++
 net/ipv4/sysctl_net_ipv4.c             | 45 ++++++++++++++++++
 net/ipv4/tcp_fastopen.c                | 83 +++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_input.c                   |  4 +-
 9 files changed, 276 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d64e53124b8c..c7fc10724948 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -467,16 +467,31 @@ tcp_syncookies - BOOLEAN
 tcp_fastopen - INTEGER
 	Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data
 	in the opening SYN packet. To use this feature, the client application
-	must not use connect(). Instead, it should use sendmsg() or sendto()
-	with MSG_FASTOPEN flag which performs a TCP handshake automatically.
-
-	The values (bitmap) are:
-	1: Enables sending data in the opening SYN on the client
-	5: Enables sending data in the opening SYN on the client regardless
-	   of cookie availability.
+	must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than
+	connect() to perform a TCP handshake automatically.
+
+	The values (bitmap) are
+	1: Enables sending data in the opening SYN on the client.
+	2: Enables TCP Fast Open on the server side, i.e., allowing data in
+	   a SYN packet to be accepted and passed to the application before
+	   3-way hand shake finishes.
+	4: Send data in the opening SYN regardless of cookie availability and
+	   without a cookie option.
+	0x100: Accept SYN data w/o validating the cookie.
+	0x200: Accept data-in-SYN w/o any cookie option present.
+	0x400/0x800: Enable Fast Open on all listeners regardless of the
+	   TCP_FASTOPEN socket option. The two different flags designate two
+	   different ways of setting max_qlen without the TCP_FASTOPEN socket
+	   option.
 
 	Default: 0
 
+	Note that the client & server side Fast Open flags (1 and 2
+	respectively) must be also enabled before the rest of flags can take
+	effect.
+
+	See include/net/tcp.h and the code for more details.
+
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
 	will be retransmitted. Should not be higher than 255. Default value
diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index ad6e3a6bf9fb..fdfba235f9f1 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -241,6 +241,10 @@ enum
 	LINUX_MIB_TCPCHALLENGEACK,		/* TCPChallengeACK */
 	LINUX_MIB_TCPSYNCHALLENGE,		/* TCPSYNChallenge */
 	LINUX_MIB_TCPFASTOPENACTIVE,		/* TCPFastOpenActive */
+	LINUX_MIB_TCPFASTOPENPASSIVE,		/* TCPFastOpenPassive*/
+	LINUX_MIB_TCPFASTOPENPASSIVEFAIL,	/* TCPFastOpenPassiveFail */
+	LINUX_MIB_TCPFASTOPENLISTENOVERFLOW,	/* TCPFastOpenListenOverflow */
+	LINUX_MIB_TCPFASTOPENCOOKIEREQD,	/* TCPFastOpenCookieReqd */
 	__LINUX_MIB_MAX
 };
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index eb125a4c30b3..ae46df590629 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -110,6 +110,7 @@ enum {
 #define TCP_REPAIR_QUEUE	20
 #define TCP_QUEUE_SEQ		21
 #define TCP_REPAIR_OPTIONS	22
+#define TCP_FASTOPEN		23	/* Enable FastOpen on listeners */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
@@ -246,6 +247,7 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 /* TCP Fast Open */
 #define TCP_FASTOPEN_COOKIE_MIN	4	/* Min Fast Open Cookie size in bytes */
 #define TCP_FASTOPEN_COOKIE_MAX	16	/* Max Fast Open Cookie size in bytes */
+#define TCP_FASTOPEN_COOKIE_SIZE 8	/* the size employed by this impl. */
 
 /* TCP Fast Open Cookie as stored in memory */
 struct tcp_fastopen_cookie {
@@ -312,9 +314,14 @@ struct tcp_request_sock {
 	/* Only used by TCP MD5 Signature so far. */
 	const struct tcp_request_sock_ops *af_specific;
 #endif
+	struct sock			*listener; /* needed for TFO */
 	u32				rcv_isn;
 	u32				snt_isn;
 	u32				snt_synack; /* synack sent time */
+	u32				rcv_nxt; /* the ack # by SYNACK. For
+						  * FastOpen it's the seq#
+						  * after data-in-SYN.
+						  */
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -505,14 +512,18 @@ struct tcp_sock {
 	struct tcp_md5sig_info	__rcu *md5sig_info;
 #endif
 
-/* TCP fastopen related information */
-	struct tcp_fastopen_request *fastopen_req;
-
 	/* When the cookie options are generated and exchanged, then this
 	 * object holds a reference to them (cookie_values->kref).  Also
 	 * contains related tcp_cookie_transactions fields.
 	 */
 	struct tcp_cookie_values  *cookie_values;
+
+/* TCP fastopen related information */
+	struct tcp_fastopen_request *fastopen_req;
+	/* fastopen_rsk points to request_sock that resulted in this big
+	 * socket. Used to retransmit SYNACKs etc.
+	 */
+	struct request_sock *fastopen_rsk;
 };
 
 enum tsq_flags {
@@ -552,6 +563,34 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
 	return (struct tcp_timewait_sock *)sk;
 }
 
+static inline bool tcp_passive_fastopen(const struct sock *sk)
+{
+	return (sk->sk_state == TCP_SYN_RECV &&
+		tcp_sk(sk)->fastopen_rsk != NULL);
+}
+
+static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc)
+{
+	return foc->len != -1;
+}
+
+static inline int fastopen_init_queue(struct sock *sk, int backlog)
+{
+	struct request_sock_queue *queue =
+	    &inet_csk(sk)->icsk_accept_queue;
+
+	if (queue->fastopenq == NULL) {
+		queue->fastopenq = kzalloc(
+		    sizeof(struct fastopen_queue),
+		    sk->sk_allocation);
+		if (queue->fastopenq == NULL)
+			return -ENOMEM;
+		spin_lock_init(&queue->fastopenq->lock);
+	}
+	queue->fastopenq->max_qlen = backlog;
+	return 0;
+}
+
 #endif	/* __KERNEL__ */
 
 #endif	/* _LINUX_TCP_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 4c0766e201e3..c3cdd6c9f448 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -106,6 +106,34 @@ struct listen_sock {
 	struct request_sock	*syn_table[0];
 };
 
+/*
+ * For a TCP Fast Open listener -
+ *	lock - protects the access to all the reqsk, which is co-owned by
+ *		the listener and the child socket.
+ *	qlen - pending TFO requests (still in TCP_SYN_RECV).
+ *	max_qlen - max TFO reqs allowed before TFO is disabled.
+ *
+ *	XXX (TFO) - ideally these fields can be made as part of "listen_sock"
+ *	structure above. But there is some implementation difficulty due to
+ *	listen_sock being part of request_sock_queue hence will be freed when
+ *	a listener is stopped. But TFO related fields may continue to be
+ *	accessed even after a listener is closed, until its sk_refcnt drops
+ *	to 0 implying no more outstanding TFO reqs. One solution is to keep
+ *	listen_opt around until	sk_refcnt drops to 0. But there is some other
+ *	complexity that needs to be resolved. E.g., a listener can be disabled
+ *	temporarily through shutdown()->tcp_disconnect(), and re-enabled later.
+ */
+struct fastopen_queue {
+	struct request_sock	*rskq_rst_head; /* Keep track of past TFO */
+	struct request_sock	*rskq_rst_tail; /* requests that caused RST.
+						 * This is part of the defense
+						 * against spoofing attack.
+						 */
+	spinlock_t	lock;
+	int		qlen;		/* # of pending (TCP_SYN_RECV) reqs */
+	int		max_qlen;	/* != 0 iff TFO is currently enabled */
+};
+
 /** struct request_sock_queue - queue of request_socks
  *
  * @rskq_accept_head - FIFO head of established children
@@ -129,6 +157,12 @@ struct request_sock_queue {
 	u8			rskq_defer_accept;
 	/* 3 bytes hole, try to pack */
 	struct listen_sock	*listen_opt;
+	struct fastopen_queue	*fastopenq; /* This is non-NULL iff TFO has been
+					     * enabled on this listener. Check
+					     * max_qlen != 0 in fastopen_queue
+					     * to determine if TFO is enabled
+					     * right at this moment.
+					     */
 };
 
 extern int reqsk_queue_alloc(struct request_sock_queue *queue,
@@ -136,6 +170,8 @@ extern int reqsk_queue_alloc(struct request_sock_queue *queue,
 
 extern void __reqsk_queue_destroy(struct request_sock_queue *queue);
 extern void reqsk_queue_destroy(struct request_sock_queue *queue);
+extern void reqsk_fastopen_remove(struct sock *sk,
+				  struct request_sock *req, bool reset);
 
 static inline struct request_sock *
 	reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0fca06f16463..9f8821e3293a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -224,8 +224,24 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 
 /* Bit Flags for sysctl_tcp_fastopen */
 #define	TFO_CLIENT_ENABLE	1
+#define	TFO_SERVER_ENABLE	2
 #define	TFO_CLIENT_NO_COOKIE	4	/* Data in SYN w/o cookie option */
 
+/* Process SYN data but skip cookie validation */
+#define	TFO_SERVER_COOKIE_NOT_CHKED	0x100
+/* Accept SYN data w/o any cookie option */
+#define	TFO_SERVER_COOKIE_NOT_REQD	0x200
+
+/* Force enable TFO on all listeners, i.e., not requiring the
+ * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen.
+ */
+#define	TFO_SERVER_WO_SOCKOPT1	0x400
+#define	TFO_SERVER_WO_SOCKOPT2	0x800
+/* Always create TFO child sockets on a TFO listener even when
+ * cookie/data not present. (For testing purpose!)
+ */
+#define	TFO_SERVER_ALWAYS	0x1000
+
 extern struct inet_timewait_death_row tcp_death_row;
 
 /* sysctl variables for tcp */
@@ -421,12 +437,6 @@ extern void tcp_metrics_init(void);
 extern bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check);
 extern bool tcp_remember_stamp(struct sock *sk);
 extern bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
-extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
-				   struct tcp_fastopen_cookie *cookie,
-				   int *syn_loss, unsigned long *last_syn_loss);
-extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
-				   struct tcp_fastopen_cookie *cookie,
-				   bool syn_lost);
 extern void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
 extern void tcp_disable_fack(struct tcp_sock *tp);
 extern void tcp_close(struct sock *sk, long timeout);
@@ -537,6 +547,7 @@ extern void tcp_send_delayed_ack(struct sock *sk);
 extern void tcp_cwnd_application_limited(struct sock *sk);
 extern void tcp_resume_early_retransmit(struct sock *sk);
 extern void tcp_rearm_rto(struct sock *sk);
+extern void tcp_reset(struct sock *sk);
 
 /* tcp_timer.c */
 extern void tcp_init_xmit_timers(struct sock *);
@@ -586,6 +597,7 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
 extern int tcp_mss_to_mtu(struct sock *sk, int mss);
 extern void tcp_mtup_init(struct sock *sk);
 extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
+extern void tcp_init_buffer_space(struct sock *sk);
 
 static inline void tcp_bound_rto(const struct sock *sk)
 {
@@ -1104,6 +1116,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
 	req->cookie_ts = 0;
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
+	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
 	ireq->tstamp_ok = rx_opt->tstamp_ok;
@@ -1308,15 +1321,34 @@ extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff
 extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
 			    const struct tcp_md5sig_key *key);
 
+/* From tcp_fastopen.c */
+extern void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
+				   struct tcp_fastopen_cookie *cookie,
+				   int *syn_loss, unsigned long *last_syn_loss);
+extern void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
+				   struct tcp_fastopen_cookie *cookie,
+				   bool syn_lost);
 struct tcp_fastopen_request {
 	/* Fast Open cookie. Size 0 means a cookie request */
 	struct tcp_fastopen_cookie	cookie;
 	struct msghdr			*data;  /* data in MSG_FASTOPEN */
 	u16				copied;	/* queued in tcp_connect() */
 };
-
 void tcp_free_fastopen_req(struct tcp_sock *tp);
 
+extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc);
+
+#define TCP_FASTOPEN_KEY_LENGTH 16
+
+/* Fastopen key context */
+struct tcp_fastopen_context {
+	struct crypto_cipher __rcu	*tfm;
+	__u8				key[TCP_FASTOPEN_KEY_LENGTH];
+	struct rcu_head			rcu;
+};
+
 /* write queue abstraction */
 static inline void tcp_write_queue_purge(struct sock *sk)
 {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 957acd12250b..8de53e1ddd54 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -263,6 +263,10 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPChallengeACK", LINUX_MIB_TCPCHALLENGEACK),
 	SNMP_MIB_ITEM("TCPSYNChallenge", LINUX_MIB_TCPSYNCHALLENGE),
 	SNMP_MIB_ITEM("TCPFastOpenActive", LINUX_MIB_TCPFASTOPENACTIVE),
+	SNMP_MIB_ITEM("TCPFastOpenPassive", LINUX_MIB_TCPFASTOPENPASSIVE),
+	SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
+	SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
+	SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 3e78c79b5586..9205e492dc9d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -232,6 +232,45 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write,
 	return 0;
 }
 
+int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
+			  size_t *lenp, loff_t *ppos)
+{
+	ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
+	struct tcp_fastopen_context *ctxt;
+	int ret;
+	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
+
+	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
+	if (!tbl.data)
+		return -ENOMEM;
+
+	rcu_read_lock();
+	ctxt = rcu_dereference(tcp_fastopen_ctx);
+	if (ctxt)
+		memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
+	rcu_read_unlock();
+
+	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
+		user_key[0], user_key[1], user_key[2], user_key[3]);
+	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+
+	if (write && ret == 0) {
+		if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
+			   user_key + 2, user_key + 3) != 4) {
+			ret = -EINVAL;
+			goto bad_key;
+		}
+		tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+	}
+
+bad_key:
+	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
+	       user_key[0], user_key[1], user_key[2], user_key[3],
+	       (char *)tbl.data, ret);
+	kfree(tbl.data);
+	return ret;
+}
+
 static struct ctl_table ipv4_table[] = {
 	{
 		.procname	= "tcp_timestamps",
@@ -385,6 +424,12 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_fastopen_key",
+		.mode		= 0600,
+		.maxlen		= ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+		.proc_handler	= proc_tcp_fastopen_key,
+	},
 	{
 		.procname	= "tcp_tw_recycle",
 		.data		= &tcp_death_row.sysctl_tw_recycle,
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index a7f729c409d7..8f7ef0ad80e5 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -1,10 +1,91 @@
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/tcp.h>
+#include <linux/rcupdate.h>
+#include <linux/rculist.h>
+#include <net/inetpeer.h>
+#include <net/tcp.h>
 
-int sysctl_tcp_fastopen;
+int sysctl_tcp_fastopen __read_mostly;
+
+struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+
+static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
+
+static void tcp_fastopen_ctx_free(struct rcu_head *head)
+{
+	struct tcp_fastopen_context *ctx =
+	    container_of(head, struct tcp_fastopen_context, rcu);
+	crypto_free_cipher(ctx->tfm);
+	kfree(ctx);
+}
+
+int tcp_fastopen_reset_cipher(void *key, unsigned int len)
+{
+	int err;
+	struct tcp_fastopen_context *ctx, *octx;
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
+
+	if (IS_ERR(ctx->tfm)) {
+		err = PTR_ERR(ctx->tfm);
+error:		kfree(ctx);
+		pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
+		return err;
+	}
+	err = crypto_cipher_setkey(ctx->tfm, key, len);
+	if (err) {
+		pr_err("TCP: TFO cipher key error: %d\n", err);
+		crypto_free_cipher(ctx->tfm);
+		goto error;
+	}
+	memcpy(ctx->key, key, len);
+
+	spin_lock(&tcp_fastopen_ctx_lock);
+
+	octx = rcu_dereference_protected(tcp_fastopen_ctx,
+				lockdep_is_held(&tcp_fastopen_ctx_lock));
+	rcu_assign_pointer(tcp_fastopen_ctx, ctx);
+	spin_unlock(&tcp_fastopen_ctx_lock);
+
+	if (octx)
+		call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
+	return err;
+}
+
+/* Computes the fastopen cookie for the peer.
+ * The peer address is a 128 bits long (pad with zeros for IPv4).
+ *
+ * The caller must check foc->len to determine if a valid cookie
+ * has been generated successfully.
+*/
+void tcp_fastopen_cookie_gen(__be32 addr, struct tcp_fastopen_cookie *foc)
+{
+	__be32 peer_addr[4] = { addr, 0, 0, 0 };
+	struct tcp_fastopen_context *ctx;
+
+	rcu_read_lock();
+	ctx = rcu_dereference(tcp_fastopen_ctx);
+	if (ctx) {
+		crypto_cipher_encrypt_one(ctx->tfm,
+					  foc->val,
+					  (__u8 *)peer_addr);
+		foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+	}
+	rcu_read_unlock();
+}
 
 static int __init tcp_fastopen_init(void)
 {
+	__u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+	get_random_bytes(key, sizeof(key));
+	tcp_fastopen_reset_cipher(key, sizeof(key));
 	return 0;
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ce4ffe9ed556..d47d5fe8f3f0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -378,7 +378,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
 /* 4. Try to fixup all. It is made immediately after connection enters
  *    established state.
  */
-static void tcp_init_buffer_space(struct sock *sk)
+void tcp_init_buffer_space(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int maxwin;
@@ -4038,7 +4038,7 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
 }
 
 /* When we get a reset we do this. */
-static void tcp_reset(struct sock *sk)
+void tcp_reset(struct sock *sk)
 {
 	/* We want the right error as BSD sees it (and indeed as we do). */
 	switch (sk->sk_state) {
-- 
cgit v1.2.3


From 8336886f786fdacbc19b719c1f7ea91eb70706d4 Mon Sep 17 00:00:00 2001
From: Jerry Chu <hkchu@google.com>
Date: Fri, 31 Aug 2012 12:29:12 +0000
Subject: tcp: TCP Fast Open Server - support TFO listeners

This patch builds on top of the previous patch to add the support
for TFO listeners. This includes -

1. allocating, properly initializing, and managing the per listener
fastopen_queue structure when TFO is enabled

2. changes to the inet_csk_accept code to support TFO. E.g., the
request_sock can no longer be freed upon accept(), not until 3WHS
finishes

3. allowing a TCP_SYN_RECV socket to properly poll() and sendmsg()
if it's a TFO socket

4. properly closing a TFO listener, and a TFO socket before 3WHS
finishes

5. supporting TCP_FASTOPEN socket option

6. modifying tcp_check_req() to use to check a TFO socket as well
as request_sock

7. supporting TCP's TFO cookie option

8. adding a new SYN-ACK retransmit handler to use the timer directly
off the TFO socket rather than the listener socket. Note that TFO
server side will not retransmit anything other than SYN-ACK until
the 3WHS is completed.

The patch also contains an important function
"reqsk_fastopen_remove()" to manage the somewhat complex relation
between a listener, its request_sock, and the corresponding child
socket. See the comment above the function for the detail.

Signed-off-by: H.K. Jerry Chu <hkchu@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h      | 13 ------
 include/net/tcp.h               |  6 ++-
 net/core/request_sock.c         | 95 +++++++++++++++++++++++++++++++++++++++++
 net/ipv4/af_inet.c              | 28 +++++++++++-
 net/ipv4/inet_connection_sock.c | 57 ++++++++++++++++++++++---
 net/ipv4/syncookies.c           |  1 +
 net/ipv4/tcp.c                  | 49 ++++++++++++++++++---
 net/ipv4/tcp_ipv4.c             |  4 +-
 net/ipv4/tcp_minisocks.c        | 61 +++++++++++++++++++++-----
 net/ipv4/tcp_output.c           | 21 ++++++---
 net/ipv4/tcp_timer.c            | 39 ++++++++++++++++-
 net/ipv6/syncookies.c           |  1 +
 net/ipv6/tcp_ipv6.c             |  5 ++-
 13 files changed, 330 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c3cdd6c9f448..b01d8dd9ee7c 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -226,19 +226,6 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue
 	return req;
 }
 
-static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
-						 struct sock *parent)
-{
-	struct request_sock *req = reqsk_queue_remove(queue);
-	struct sock *child = req->sk;
-
-	WARN_ON(child == NULL);
-
-	sk_acceptq_removed(parent);
-	__reqsk_free(req);
-	return child;
-}
-
 static inline int reqsk_queue_removed(struct request_sock_queue *queue,
 				      struct request_sock *req)
 {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9f8821e3293a..1421b02a7905 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -424,7 +424,8 @@ extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *
 						     const struct tcphdr *th);
 extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
 				   struct request_sock *req,
-				   struct request_sock **prev);
+				   struct request_sock **prev,
+				   bool fastopen);
 extern int tcp_child_process(struct sock *parent, struct sock *child,
 			     struct sk_buff *skb);
 extern bool tcp_use_frto(struct sock *sk);
@@ -478,7 +479,8 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 extern int tcp_connect(struct sock *sk);
 extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 					struct request_sock *req,
-					struct request_values *rvp);
+					struct request_values *rvp,
+					struct tcp_fastopen_cookie *foc);
 extern int tcp_disconnect(struct sock *sk, int flags);
 
 void tcp_connect_init(struct sock *sk);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 9b570a6a33c5..c31d9e8668c3 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -15,6 +15,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/tcp.h>
 #include <linux/vmalloc.h>
 
 #include <net/request_sock.h>
@@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
 		kfree(lopt);
 }
 
+/*
+ * This function is called to set a Fast Open socket's "fastopen_rsk" field
+ * to NULL when a TFO socket no longer needs to access the request_sock.
+ * This happens only after 3WHS has been either completed or aborted (e.g.,
+ * RST is received).
+ *
+ * Before TFO, a child socket is created only after 3WHS is completed,
+ * hence it never needs to access the request_sock. things get a lot more
+ * complex with TFO. A child socket, accepted or not, has to access its
+ * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts,
+ * until 3WHS is either completed or aborted. Afterwards the req will stay
+ * until either the child socket is accepted, or in the rare case when the
+ * listener is closed before the child is accepted.
+ *
+ * In short, a request socket is only freed after BOTH 3WHS has completed
+ * (or aborted) and the child socket has been accepted (or listener closed).
+ * When a child socket is accepted, its corresponding req->sk is set to
+ * NULL since it's no longer needed. More importantly, "req->sk == NULL"
+ * will be used by the code below to determine if a child socket has been
+ * accepted or not, and the check is protected by the fastopenq->lock
+ * described below.
+ *
+ * Note that fastopen_rsk is only accessed from the child socket's context
+ * with its socket lock held. But a request_sock (req) can be accessed by
+ * both its child socket through fastopen_rsk, and a listener socket through
+ * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin
+ * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created.
+ * only in the rare case when both the listener and the child locks are held,
+ * e.g., in inet_csk_listen_stop() do we not need to acquire the lock.
+ * The lock also protects other fields such as fastopenq->qlen, which is
+ * decremented by this function when fastopen_rsk is no longer needed.
+ *
+ * Note that another solution was to simply use the existing socket lock
+ * from the listener. But first socket lock is difficult to use. It is not
+ * a simple spin lock - one must consider sock_owned_by_user() and arrange
+ * to use sk_add_backlog() stuff. But what really makes it infeasible is the
+ * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to
+ * acquire a child's lock while holding listener's socket lock. A corner
+ * case might also exist in tcp_v4_hnd_req() that will trigger this locking
+ * order.
+ *
+ * When a TFO req is created, it needs to sock_hold its listener to prevent
+ * the latter data structure from going away.
+ *
+ * This function also sets "treq->listener" to NULL and unreference listener
+ * socket. treq->listener is used by the listener so it is protected by the
+ * fastopenq->lock in this function.
+ */
+void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
+			   bool reset)
+{
+	struct sock *lsk = tcp_rsk(req)->listener;
+	struct fastopen_queue *fastopenq =
+	    inet_csk(lsk)->icsk_accept_queue.fastopenq;
+
+	BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk));
+
+	tcp_sk(sk)->fastopen_rsk = NULL;
+	spin_lock_bh(&fastopenq->lock);
+	fastopenq->qlen--;
+	tcp_rsk(req)->listener = NULL;
+	if (req->sk)	/* the child socket hasn't been accepted yet */
+		goto out;
+
+	if (!reset || lsk->sk_state != TCP_LISTEN) {
+		/* If the listener has been closed don't bother with the
+		 * special RST handling below.
+		 */
+		spin_unlock_bh(&fastopenq->lock);
+		sock_put(lsk);
+		reqsk_free(req);
+		return;
+	}
+	/* Wait for 60secs before removing a req that has triggered RST.
+	 * This is a simple defense against TFO spoofing attack - by
+	 * counting the req against fastopen.max_qlen, and disabling
+	 * TFO when the qlen exceeds max_qlen.
+	 *
+	 * For more details see CoNext'11 "TCP Fast Open" paper.
+	 */
+	req->expires = jiffies + 60*HZ;
+	if (fastopenq->rskq_rst_head == NULL)
+		fastopenq->rskq_rst_head = req;
+	else
+		fastopenq->rskq_rst_tail->dl_next = req;
+
+	req->dl_next = NULL;
+	fastopenq->rskq_rst_tail = req;
+	fastopenq->qlen++;
+out:
+	spin_unlock_bh(&fastopenq->lock);
+	sock_put(lsk);
+	return;
+}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6681ccf5c3ee..4f70ef0b946d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -149,6 +149,11 @@ void inet_sock_destruct(struct sock *sk)
 		pr_err("Attempt to release alive inet socket %p\n", sk);
 		return;
 	}
+	if (sk->sk_type == SOCK_STREAM) {
+		struct fastopen_queue *fastopenq =
+			inet_csk(sk)->icsk_accept_queue.fastopenq;
+		kfree(fastopenq);
+	}
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
@@ -212,6 +217,26 @@ int inet_listen(struct socket *sock, int backlog)
 	 * we can only allow the backlog to be adjusted.
 	 */
 	if (old_state != TCP_LISTEN) {
+		/* Check special setups for testing purpose to enable TFO w/o
+		 * requiring TCP_FASTOPEN sockopt.
+		 * Note that only TCP sockets (SOCK_STREAM) will reach here.
+		 * Also fastopenq may already been allocated because this
+		 * socket was in TCP_LISTEN state previously but was
+		 * shutdown() (rather than close()).
+		 */
+		if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
+		    inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {
+			if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
+				err = fastopen_init_queue(sk, backlog);
+			else if ((sysctl_tcp_fastopen &
+				  TFO_SERVER_WO_SOCKOPT2) != 0)
+				err = fastopen_init_queue(sk,
+				    ((uint)sysctl_tcp_fastopen) >> 16);
+			else
+				err = 0;
+			if (err)
+				goto out;
+		}
 		err = inet_csk_listen_start(sk, backlog);
 		if (err)
 			goto out;
@@ -701,7 +726,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	sock_rps_record_flow(sk2);
 	WARN_ON(!((1 << sk2->sk_state) &
-		  (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+		  (TCPF_ESTABLISHED | TCPF_SYN_RECV |
+		  TCPF_CLOSE_WAIT | TCPF_CLOSE)));
 
 	sock_graft(sk2, newsock);
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7f75f21d7b83..8464b79c493f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 	struct sock *newsk;
+	struct request_sock *req;
 	int error;
 
 	lock_sock(sk);
@@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 		goto out_err;
 
 	/* Find already established connection */
-	if (reqsk_queue_empty(&icsk->icsk_accept_queue)) {
+	if (reqsk_queue_empty(queue)) {
 		long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
 		/* If this is a non blocking socket don't sleep */
@@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 		if (error)
 			goto out_err;
 	}
-
-	newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk);
-	WARN_ON(newsk->sk_state == TCP_SYN_RECV);
+	req = reqsk_queue_remove(queue);
+	newsk = req->sk;
+
+	sk_acceptq_removed(sk);
+	if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) {
+		spin_lock_bh(&queue->fastopenq->lock);
+		if (tcp_rsk(req)->listener) {
+			/* We are still waiting for the final ACK from 3WHS
+			 * so can't free req now. Instead, we set req->sk to
+			 * NULL to signify that the child socket is taken
+			 * so reqsk_fastopen_remove() will free the req
+			 * when 3WHS finishes (or is aborted).
+			 */
+			req->sk = NULL;
+			req = NULL;
+		}
+		spin_unlock_bh(&queue->fastopenq->lock);
+	}
 out:
 	release_sock(sk);
+	if (req)
+		__reqsk_free(req);
 	return newsk;
 out_err:
 	newsk = NULL;
+	req = NULL;
 	*err = error;
 	goto out;
 }
@@ -720,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
 void inet_csk_listen_stop(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 	struct request_sock *acc_req;
 	struct request_sock *req;
 
 	inet_csk_delete_keepalive_timer(sk);
 
 	/* make all the listen_opt local to us */
-	acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue);
+	acc_req = reqsk_queue_yank_acceptq(queue);
 
 	/* Following specs, it would be better either to send FIN
 	 * (and enter FIN-WAIT-1, it is normal close)
@@ -736,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk)
 	 * To be honest, we are not able to make either
 	 * of the variants now.			--ANK
 	 */
-	reqsk_queue_destroy(&icsk->icsk_accept_queue);
+	reqsk_queue_destroy(queue);
 
 	while ((req = acc_req) != NULL) {
 		struct sock *child = req->sk;
@@ -754,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk)
 
 		percpu_counter_inc(sk->sk_prot->orphan_count);
 
+		if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) {
+			BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+			BUG_ON(sk != tcp_rsk(req)->listener);
+
+			/* Paranoid, to prevent race condition if
+			 * an inbound pkt destined for child is
+			 * blocked by sock lock in tcp_v4_rcv().
+			 * Also to satisfy an assertion in
+			 * tcp_v4_destroy_sock().
+			 */
+			tcp_sk(child)->fastopen_rsk = NULL;
+			sock_put(sk);
+		}
 		inet_csk_destroy_sock(child);
 
 		bh_unlock_sock(child);
@@ -763,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk)
 		sk_acceptq_removed(sk);
 		__reqsk_free(req);
 	}
+	if (queue->fastopenq != NULL) {
+		/* Free all the reqs queued in rskq_rst_head. */
+		spin_lock_bh(&queue->fastopenq->lock);
+		acc_req = queue->fastopenq->rskq_rst_head;
+		queue->fastopenq->rskq_rst_head = NULL;
+		spin_unlock_bh(&queue->fastopenq->lock);
+		while ((req = acc_req) != NULL) {
+			acc_req = req->dl_next;
+			__reqsk_free(req);
+		}
+	}
 	WARN_ON(sk->sk_ack_backlog);
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 650e1528e1e6..ba48e799b031 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -319,6 +319,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
 	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+	treq->listener		= NULL;
 
 	/* We throwed the options of the initial SYN away, so we hope
 	 * the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2109ff4a1daf..df83d744e380 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 
-	/* Connected? */
-	if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+	/* Connected or passive Fast Open socket? */
+	if (sk->sk_state != TCP_SYN_SENT &&
+	    (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) {
 		int target = sock_rcvlowat(sk, 0, INT_MAX);
 
 		if (tp->urg_seq == tp->copied_seq &&
@@ -840,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 	ssize_t copied;
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
-	/* Wait for a connection to finish. */
-	if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+	/* Wait for a connection to finish. One exception is TCP Fast Open
+	 * (passive side) where data is allowed to be sent before a connection
+	 * is fully established.
+	 */
+	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
+	    !tcp_passive_fastopen(sk)) {
 		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
 			goto out_err;
+	}
 
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
@@ -1042,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
-	/* Wait for a connection to finish. */
-	if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+	/* Wait for a connection to finish. One exception is TCP Fast Open
+	 * (passive side) where data is allowed to be sent before a connection
+	 * is fully established.
+	 */
+	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
+	    !tcp_passive_fastopen(sk)) {
 		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
 			goto do_error;
+	}
 
 	if (unlikely(tp->repair)) {
 		if (tp->repair_queue == TCP_RECV_QUEUE) {
@@ -2144,6 +2155,10 @@ void tcp_close(struct sock *sk, long timeout)
 		 * they look as CLOSING or LAST_ACK for Linux)
 		 * Probably, I missed some more holelets.
 		 * 						--ANK
+		 * XXX (TFO) - To start off we don't support SYN+ACK+FIN
+		 * in a single packet! (May consider it later but will
+		 * probably need API support or TCP_CORK SYN-ACK until
+		 * data is written and socket is closed.)
 		 */
 		tcp_send_fin(sk);
 	}
@@ -2215,8 +2230,16 @@ adjudge_to_death:
 		}
 	}
 
-	if (sk->sk_state == TCP_CLOSE)
+	if (sk->sk_state == TCP_CLOSE) {
+		struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+		/* We could get here with a non-NULL req if the socket is
+		 * aborted (e.g., closed with unread data) before 3WHS
+		 * finishes.
+		 */
+		if (req != NULL)
+			reqsk_fastopen_remove(sk, req, false);
 		inet_csk_destroy_sock(sk);
+	}
 	/* Otherwise, socket is reprieved until protocol close. */
 
 out:
@@ -2688,6 +2711,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		else
 			icsk->icsk_user_timeout = msecs_to_jiffies(val);
 		break;
+
+	case TCP_FASTOPEN:
+		if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
+		    TCPF_LISTEN)))
+			err = fastopen_init_queue(sk, val);
+		else
+			err = -EINVAL;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -3501,11 +3532,15 @@ EXPORT_SYMBOL(tcp_cookie_generator);
 
 void tcp_done(struct sock *sk)
 {
+	struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+
 	if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 
 	tcp_set_state(sk, TCP_CLOSE);
 	tcp_clear_xmit_timers(sk);
+	if (req != NULL)
+		reqsk_fastopen_remove(sk, req, false);
 
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 36f02f954ac1..bb148dee1edd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -839,7 +839,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		return -1;
 
-	skb = tcp_make_synack(sk, dst, req, rvp);
+	skb = tcp_make_synack(sk, dst, req, rvp, NULL);
 
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
@@ -1554,7 +1554,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
 						       iph->saddr, iph->daddr);
 	if (req)
-		return tcp_check_req(sk, skb, req, prev);
+		return tcp_check_req(sk, skb, req, prev, false);
 
 	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
 			th->source, iph->daddr, th->dest, inet_iif(skb));
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6ff7f10dce9d..e965319d610b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -507,6 +507,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
 		newtp->rx_opt.mss_clamp = req->mss;
 		TCP_ECN_openreq_child(newtp, req);
+		newtp->fastopen_rsk = NULL;
 
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
@@ -515,13 +516,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 EXPORT_SYMBOL(tcp_create_openreq_child);
 
 /*
- *	Process an incoming packet for SYN_RECV sockets represented
- *	as a request_sock.
+ * Process an incoming packet for SYN_RECV sockets represented as a
+ * request_sock. Normally sk is the listener socket but for TFO it
+ * points to the child socket.
+ *
+ * XXX (TFO) - The current impl contains a special check for ack
+ * validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
  */
 
 struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock *req,
-			   struct request_sock **prev)
+			   struct request_sock **prev,
+			   bool fastopen)
 {
 	struct tcp_options_received tmp_opt;
 	const u8 *hash_location;
@@ -530,6 +536,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	bool paws_reject = false;
 
+	BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
+
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
 		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
@@ -565,6 +573,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		 *
 		 * Enforce "SYN-ACK" according to figure 8, figure 6
 		 * of RFC793, fixed by RFC1122.
+		 *
+		 * Note that even if there is new data in the SYN packet
+		 * they will be thrown away too.
 		 */
 		req->rsk_ops->rtx_syn_ack(sk, req, NULL);
 		return NULL;
@@ -622,9 +633,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 *                  sent (the segment carries an unacceptable ACK) ...
 	 *                  a reset is sent."
 	 *
-	 * Invalid ACK: reset will be sent by listening socket
+	 * Invalid ACK: reset will be sent by listening socket.
+	 * Note that the ACK validity check for a Fast Open socket is done
+	 * elsewhere and is checked directly against the child socket rather
+	 * than req because user data may have been sent out.
 	 */
-	if ((flg & TCP_FLAG_ACK) &&
+	if ((flg & TCP_FLAG_ACK) && !fastopen &&
 	    (TCP_SKB_CB(skb)->ack_seq !=
 	     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
 		return sk;
@@ -637,7 +651,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	/* RFC793: "first check sequence number". */
 
 	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-					  tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) {
+					  tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
 		/* Out of window: send ACK and drop. */
 		if (!(flg & TCP_FLAG_RST))
 			req->rsk_ops->send_ack(sk, skb, req);
@@ -648,7 +662,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	/* In sequence, PAWS is OK. */
 
-	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1))
+	if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
 		req->ts_recent = tmp_opt.rcv_tsval;
 
 	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
@@ -667,10 +681,19 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	/* ACK sequence verified above, just make sure ACK is
 	 * set.  If ACK not set, just silently drop the packet.
+	 *
+	 * XXX (TFO) - if we ever allow "data after SYN", the
+	 * following check needs to be removed.
 	 */
 	if (!(flg & TCP_FLAG_ACK))
 		return NULL;
 
+	/* For Fast Open no more processing is needed (sk is the
+	 * child socket).
+	 */
+	if (fastopen)
+		return sk;
+
 	/* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
 	if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
 	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
@@ -706,11 +729,21 @@ listen_overflow:
 	}
 
 embryonic_reset:
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
-	if (!(flg & TCP_FLAG_RST))
+	if (!(flg & TCP_FLAG_RST)) {
+		/* Received a bad SYN pkt - for TFO We try not to reset
+		 * the local connection unless it's really necessary to
+		 * avoid becoming vulnerable to outside attack aiming at
+		 * resetting legit local connections.
+		 */
 		req->rsk_ops->send_reset(sk, skb);
-
-	inet_csk_reqsk_queue_drop(sk, req, prev);
+	} else if (fastopen) { /* received a valid RST pkt */
+		reqsk_fastopen_remove(sk, req, true);
+		tcp_reset(sk);
+	}
+	if (!fastopen) {
+		inet_csk_reqsk_queue_drop(sk, req, prev);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+	}
 	return NULL;
 }
 EXPORT_SYMBOL(tcp_check_req);
@@ -719,6 +752,12 @@ EXPORT_SYMBOL(tcp_check_req);
  * Queue segment on the new socket if the new socket is active,
  * otherwise we just shortcircuit this and continue with
  * the new socket.
+ *
+ * For the vast majority of cases child->sk_state will be TCP_SYN_RECV
+ * when entering. But other states are possible due to a race condition
+ * where after __inet_lookup_established() fails but before the listener
+ * locked is obtained, other packets cause the same connection to
+ * be created.
  */
 
 int tcp_child_process(struct sock *parent, struct sock *child,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d04632673a9e..9383b51f3efc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -702,7 +702,8 @@ static unsigned int tcp_synack_options(struct sock *sk,
 				   unsigned int mss, struct sk_buff *skb,
 				   struct tcp_out_options *opts,
 				   struct tcp_md5sig_key **md5,
-				   struct tcp_extend_values *xvp)
+				   struct tcp_extend_values *xvp,
+				   struct tcp_fastopen_cookie *foc)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@ -747,7 +748,15 @@ static unsigned int tcp_synack_options(struct sock *sk,
 		if (unlikely(!ireq->tstamp_ok))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
 	}
-
+	if (foc != NULL) {
+		u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
+		need = (need + 3) & ~3U;  /* Align to 32 bits */
+		if (remaining >= need) {
+			opts->options |= OPTION_FAST_OPEN_COOKIE;
+			opts->fastopen_cookie = foc;
+			remaining -= need;
+		}
+	}
 	/* Similar rationale to tcp_syn_options() applies here, too.
 	 * If the <SYN> options fit, the same options should fit now!
 	 */
@@ -2658,7 +2667,8 @@ int tcp_send_synack(struct sock *sk)
  */
 struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
-				struct request_values *rvp)
+				struct request_values *rvp,
+				struct tcp_fastopen_cookie *foc)
 {
 	struct tcp_out_options opts;
 	struct tcp_extend_values *xvp = tcp_xv(rvp);
@@ -2718,7 +2728,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 #endif
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	tcp_header_size = tcp_synack_options(sk, req, mss,
-					     skb, &opts, &md5, xvp)
+					     skb, &opts, &md5, xvp, foc)
 			+ sizeof(*th);
 
 	skb_push(skb, tcp_header_size);
@@ -2772,7 +2782,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	}
 
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
-	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
+	/* XXX data is queued and acked as is. No buffer/window check */
+	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 	th->window = htons(min(req->rcv_wnd, 65535U));
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b774a03bd1dc..fc04711e80c8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -304,6 +304,35 @@ static void tcp_probe_timer(struct sock *sk)
 	}
 }
 
+/*
+ *	Timer for Fast Open socket to retransmit SYNACK. Note that the
+ *	sk here is the child socket, not the parent (listener) socket.
+ */
+static void tcp_fastopen_synack_timer(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	int max_retries = icsk->icsk_syn_retries ? :
+	    sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
+	struct request_sock *req;
+
+	req = tcp_sk(sk)->fastopen_rsk;
+	req->rsk_ops->syn_ack_timeout(sk, req);
+
+	if (req->retrans >= max_retries) {
+		tcp_write_err(sk);
+		return;
+	}
+	/* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error
+	 * returned from rtx_syn_ack() to make it more persistent like
+	 * regular retransmit because if the child socket has been accepted
+	 * it's not good to give up too easily.
+	 */
+	req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+	req->retrans++;
+	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+			  TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX);
+}
+
 /*
  *	The TCP retransmit timer.
  */
@@ -317,7 +346,15 @@ void tcp_retransmit_timer(struct sock *sk)
 		tcp_resume_early_retransmit(sk);
 		return;
 	}
-
+	if (tp->fastopen_rsk) {
+		BUG_ON(sk->sk_state != TCP_SYN_RECV &&
+		    sk->sk_state != TCP_FIN_WAIT1);
+		tcp_fastopen_synack_timer(sk);
+		/* Before we receive ACK to our SYN-ACK don't retransmit
+		 * anything else (e.g., data or FIN segments).
+		 */
+		return;
+	}
 	if (!tp->packets_out)
 		goto out;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb46061c813a..182ab9a85d6c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -190,6 +190,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq6 = inet6_rsk(req);
 	treq = tcp_rsk(req);
+	treq->listener = NULL;
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto out_free;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f99b81d53cca..09078b9bc6f6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -475,7 +475,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
 		goto done;
 
-	skb = tcp_make_synack(sk, dst, req, rvp);
+	skb = tcp_make_synack(sk, dst, req, rvp, NULL);
 
 	if (skb) {
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -987,7 +987,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 				   &ipv6_hdr(skb)->saddr,
 				   &ipv6_hdr(skb)->daddr, inet6_iif(skb));
 	if (req)
-		return tcp_check_req(sk, skb, req, prev);
+		return tcp_check_req(sk, skb, req, prev, false);
 
 	nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
 			&ipv6_hdr(skb)->saddr, th->source,
@@ -1179,6 +1179,7 @@ have_isn:
 	    want_cookie)
 		goto drop_and_free;
 
+	tcp_rsk(req)->listener = NULL;
 	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	return 0;
 
-- 
cgit v1.2.3


From 84b5ee939eba0115739c19c0e01ea903b029c9da Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 28 Aug 2012 00:53:15 +0000
Subject: netfilter: nf_conntrack: add nf_ct_timeout_lookup

This patch adds the new nf_ct_timeout_lookup function to encapsulate
the timeout policy attachment that is called in the nf_conntrack_in
path.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_timeout.h | 20 ++++++++++++++++++++
 net/netfilter/nf_conntrack_core.c            |  7 +------
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 34ec89f8dbf9..e41e472d08f2 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -55,6 +55,26 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct,
 #endif
 };
 
+static inline unsigned int *
+nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct,
+		     struct nf_conntrack_l4proto *l4proto)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+	struct nf_conn_timeout *timeout_ext;
+	unsigned int *timeouts;
+
+	timeout_ext = nf_ct_timeout_find(ct);
+	if (timeout_ext)
+		timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
+	else
+		timeouts = l4proto->get_timeouts(net);
+
+	return timeouts;
+#else
+	return l4proto->get_timeouts(net);
+#endif
+}
+
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 extern int nf_conntrack_timeout_init(struct net *net);
 extern void nf_conntrack_timeout_fini(struct net *net);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index f83e79defed9..c9bb994ae9ba 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -930,7 +930,6 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_l3proto *l3proto;
 	struct nf_conntrack_l4proto *l4proto;
-	struct nf_conn_timeout *timeout_ext;
 	unsigned int *timeouts;
 	unsigned int dataoff;
 	u_int8_t protonum;
@@ -997,11 +996,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 	NF_CT_ASSERT(skb->nfct);
 
 	/* Decide what timeout policy we want to apply to this flow. */
-	timeout_ext = nf_ct_timeout_find(ct);
-	if (timeout_ext)
-		timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
-	else
-		timeouts = l4proto->get_timeouts(net);
+	timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
 
 	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
 	if (ret <= 0) {
-- 
cgit v1.2.3


From 684bad1107571d35610a674c61b3544efb5a5b13 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Sun, 2 Sep 2012 17:38:04 +0000
Subject: tcp: use PRR to reduce cwin in CWR state

Use proportional rate reduction (PRR) algorithm to reduce cwnd in CWR state,
in addition to Recovery state. Retire the current rate-halving in CWR.
When losses are detected via ACKs in CWR state, the sender enters Recovery
state but the cwnd reduction continues and does not restart.

Rename and refactor cwnd reduction functions since both CWR and Recovery
use the same algorithm:
tcp_init_cwnd_reduction() is new and initiates reduction state variables.
tcp_cwnd_reduction() is previously tcp_update_cwnd_in_recovery().
tcp_ends_cwnd_reduction() is previously  tcp_complete_cwr().

The rate halving functions and logic such as tcp_cwnd_down(), tcp_min_cwnd(),
and the cwnd moderation inside tcp_enter_cwr() are removed. The unused
parameter, flag, in tcp_cwnd_reduction() is also removed.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  10 ++++-
 net/ipv4/tcp_input.c  | 119 +++++++++++++++++---------------------------------
 net/ipv4/tcp_output.c |   6 +--
 3 files changed, 52 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1421b02a7905..a8cb00c0c6d9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -913,15 +913,21 @@ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
 	return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
 }
 
+static inline bool tcp_in_cwnd_reduction(const struct sock *sk)
+{
+	return (TCPF_CA_CWR | TCPF_CA_Recovery) &
+	       (1 << inet_csk(sk)->icsk_ca_state);
+}
+
 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
- * The exception is rate halving phase, when cwnd is decreasing towards
+ * The exception is cwnd reduction phase, when cwnd is decreasing towards
  * ssthresh.
  */
 static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
+	if (tcp_in_cwnd_reduction(sk))
 		return tp->snd_ssthresh;
 	else
 		return max(tp->snd_ssthresh,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 38589e464e63..e2bec815ff23 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2470,35 +2470,6 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-/* Lower bound on congestion window is slow start threshold
- * unless congestion avoidance choice decides to overide it.
- */
-static inline u32 tcp_cwnd_min(const struct sock *sk)
-{
-	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
-
-	return ca_ops->min_cwnd ? ca_ops->min_cwnd(sk) : tcp_sk(sk)->snd_ssthresh;
-}
-
-/* Decrease cwnd each second ack. */
-static void tcp_cwnd_down(struct sock *sk, int flag)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int decr = tp->snd_cwnd_cnt + 1;
-
-	if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
-	    (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
-		tp->snd_cwnd_cnt = decr & 1;
-		decr >>= 1;
-
-		if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
-			tp->snd_cwnd -= decr;
-
-		tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
-		tp->snd_cwnd_stamp = tcp_time_stamp;
-	}
-}
-
 /* Nothing was retransmitted or returned timestamp is less
  * than timestamp of the first retransmission.
  */
@@ -2700,9 +2671,8 @@ static bool tcp_try_undo_loss(struct sock *sk)
 	return false;
 }
 
-/* This function implements the PRR algorithm, specifcally the PRR-SSRB
- * (proportional rate reduction with slow start reduction bound) as described in
- * http://www.ietf.org/id/draft-mathis-tcpm-proportional-rate-reduction-01.txt.
+/* The cwnd reduction in CWR and Recovery use the PRR algorithm
+ * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
  * It computes the number of packets to send (sndcnt) based on packets newly
  * delivered:
  *   1) If the packets in flight is larger than ssthresh, PRR spreads the
@@ -2711,13 +2681,29 @@ static bool tcp_try_undo_loss(struct sock *sk)
  *	losses and/or application stalls), do not perform any further cwnd
  *	reductions, but instead slow start up to ssthresh.
  */
-static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
-					int fast_rexmit, int flag)
+static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	tp->high_seq = tp->snd_nxt;
+	tp->bytes_acked = 0;
+	tp->snd_cwnd_cnt = 0;
+	tp->prior_cwnd = tp->snd_cwnd;
+	tp->prr_delivered = 0;
+	tp->prr_out = 0;
+	if (set_ssthresh)
+		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+	TCP_ECN_queue_cwr(tp);
+}
+
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+			       int fast_rexmit)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int sndcnt = 0;
 	int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
 
+	tp->prr_delivered += newly_acked_sacked;
 	if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
 		u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
 			       tp->prior_cwnd - 1;
@@ -2732,43 +2718,29 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
 	tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
 }
 
-static inline void tcp_complete_cwr(struct sock *sk)
+static inline void tcp_end_cwnd_reduction(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	/* Do not moderate cwnd if it's already undone in cwr or recovery. */
-	if (tp->undo_marker) {
-		if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
-			tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-			tp->snd_cwnd_stamp = tcp_time_stamp;
-		} else if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH) {
-			/* PRR algorithm. */
-			tp->snd_cwnd = tp->snd_ssthresh;
-			tp->snd_cwnd_stamp = tcp_time_stamp;
-		}
+	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
+	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+		tp->snd_cwnd = tp->snd_ssthresh;
+		tp->snd_cwnd_stamp = tcp_time_stamp;
 	}
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
 
-/* Set slow start threshold and cwnd not falling to slow start */
+/* Enter CWR state. Disable cwnd undo since congestion is proven with ECN */
 void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tp->prior_ssthresh = 0;
 	tp->bytes_acked = 0;
-	if (icsk->icsk_ca_state < TCP_CA_CWR) {
+	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
-		if (set_ssthresh)
-			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-		tp->snd_cwnd = min(tp->snd_cwnd,
-				   tcp_packets_in_flight(tp) + 1U);
-		tp->snd_cwnd_cnt = 0;
-		tp->high_seq = tp->snd_nxt;
-		tp->snd_cwnd_stamp = tcp_time_stamp;
-		TCP_ECN_queue_cwr(tp);
-
+		tcp_init_cwnd_reduction(sk, set_ssthresh);
 		tcp_set_ca_state(sk, TCP_CA_CWR);
 	}
 }
@@ -2787,7 +2759,7 @@ static void tcp_try_keep_open(struct sock *sk)
 	}
 }
 
-static void tcp_try_to_open(struct sock *sk, int flag)
+static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2804,7 +2776,7 @@ static void tcp_try_to_open(struct sock *sk, int flag)
 		if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
 			tcp_moderate_cwnd(tp);
 	} else {
-		tcp_cwnd_down(sk, flag);
+		tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
 	}
 }
 
@@ -2898,7 +2870,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 
 	NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
-	tp->high_seq = tp->snd_nxt;
 	tp->prior_ssthresh = 0;
 	tp->undo_marker = tp->snd_una;
 	tp->undo_retrans = tp->retrans_out;
@@ -2906,15 +2877,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		if (!ece_ack)
 			tp->prior_ssthresh = tcp_current_ssthresh(sk);
-		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
-		TCP_ECN_queue_cwr(tp);
+		tcp_init_cwnd_reduction(sk, true);
 	}
-
-	tp->bytes_acked = 0;
-	tp->snd_cwnd_cnt = 0;
-	tp->prior_cwnd = tp->snd_cwnd;
-	tp->prr_delivered = 0;
-	tp->prr_out = 0;
 	tcp_set_ca_state(sk, TCP_CA_Recovery);
 }
 
@@ -2974,7 +2938,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 			/* CWR is to be held something *above* high_seq
 			 * is ACKed for CWR bit to reach receiver. */
 			if (tp->snd_una != tp->high_seq) {
-				tcp_complete_cwr(sk);
+				tcp_end_cwnd_reduction(sk);
 				tcp_set_ca_state(sk, TCP_CA_Open);
 			}
 			break;
@@ -2984,7 +2948,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 				tcp_reset_reno_sack(tp);
 			if (tcp_try_undo_recovery(sk))
 				return;
-			tcp_complete_cwr(sk);
+			tcp_end_cwnd_reduction(sk);
 			break;
 		}
 	}
@@ -3025,7 +2989,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 			tcp_try_undo_dsack(sk);
 
 		if (!tcp_time_to_recover(sk, flag)) {
-			tcp_try_to_open(sk, flag);
+			tcp_try_to_open(sk, flag, newly_acked_sacked);
 			return;
 		}
 
@@ -3047,8 +3011,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
 
 	if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
 		tcp_update_scoreboard(sk, fast_rexmit);
-	tp->prr_delivered += newly_acked_sacked;
-	tcp_update_cwnd_in_recovery(sk, newly_acked_sacked, fast_rexmit, flag);
+	tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
 	tcp_xmit_retransmit_queue(sk);
 }
 
@@ -3394,7 +3357,7 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
-		!((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR));
+		!tcp_in_cwnd_reduction(sk);
 }
 
 /* Check that window update is acceptable.
@@ -3462,9 +3425,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 }
 
 /* A conservative spurious RTO response algorithm: reduce cwnd using
- * rate halving and continue in congestion avoidance.
+ * PRR and continue in congestion avoidance.
  */
-static void tcp_ratehalving_spur_to_response(struct sock *sk)
+static void tcp_cwr_spur_to_response(struct sock *sk)
 {
 	tcp_enter_cwr(sk, 0);
 }
@@ -3472,7 +3435,7 @@ static void tcp_ratehalving_spur_to_response(struct sock *sk)
 static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 {
 	if (flag & FLAG_ECE)
-		tcp_ratehalving_spur_to_response(sk);
+		tcp_cwr_spur_to_response(sk);
 	else
 		tcp_undo_cwr(sk, true);
 }
@@ -3579,7 +3542,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
 			tcp_conservative_spur_to_response(tp);
 			break;
 		default:
-			tcp_ratehalving_spur_to_response(sk);
+			tcp_cwr_spur_to_response(sk);
 			break;
 		}
 		tp->frto_counter = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9383b51f3efc..cfe6ffe1c177 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2037,10 +2037,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		if (push_one)
 			break;
 	}
-	if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
-		tp->prr_out += sent_pkts;
 
 	if (likely(sent_pkts)) {
+		if (tcp_in_cwnd_reduction(sk))
+			tp->prr_out += sent_pkts;
 		tcp_cwnd_validate(sk);
 		return false;
 	}
@@ -2542,7 +2542,7 @@ begin_fwd:
 		}
 		NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
-		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery)
+		if (tcp_in_cwnd_reduction(sk))
 			tp->prr_out += tcp_skb_pcount(skb);
 
 		if (skb == tcp_write_queue_head(sk))
-- 
cgit v1.2.3


From c9a0a3025262c6395ac1464999cade56cddac3d6 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Sun, 26 Aug 2012 14:21:47 -0400
Subject: cfg80211: add kerneldoc entry for "vht_cap"

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4c518f1f1aca..60597cf7b83a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -243,6 +243,7 @@ struct ieee80211_sta_vht_cap {
  *	rates" IE, i.e. CCK rates first, then OFDM.
  * @n_bitrates: Number of bitrates in @bitrates
  * @ht_cap: HT capabilities in this band
+ * @vht_cap: VHT capabilities in this band
  */
 struct ieee80211_supported_band {
 	struct ieee80211_channel *channels;
-- 
cgit v1.2.3


From d23ff701643a4a725e2c7a8ba2d567d39daa29ea Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Tue, 4 Sep 2012 11:03:15 +0000
Subject: tcp: add generic netlink support for tcp_metrics

Add support for genl "tcp_metrics". No locking
is changed, only that now we can unlink and delete
entries after grace period. We implement get/del for
single entry and dump to support show/flush filtering
in user space. Del without address attribute causes
flush for all addresses, sadly under genl_mutex.

v2:
- remove rcu_assign_pointer as suggested by Eric Dumazet,
it is not needed because there are no other writes under lock
- move the flushing code in tcp_metrics_flush_all

v3:
- remove synchronize_rcu on flush as suggested by Eric Dumazet

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild        |   1 +
 include/linux/tcp_metrics.h |  54 +++++++
 net/ipv4/tcp_metrics.c      | 354 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 396 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/tcp_metrics.h

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 1f2c1c787f17..90da0af28352 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -363,6 +363,7 @@ header-y += sysctl.h
 header-y += sysinfo.h
 header-y += taskstats.h
 header-y += tcp.h
+header-y += tcp_metrics.h
 header-y += telephony.h
 header-y += termios.h
 header-y += time.h
diff --git a/include/linux/tcp_metrics.h b/include/linux/tcp_metrics.h
new file mode 100644
index 000000000000..cb5157b55f32
--- /dev/null
+++ b/include/linux/tcp_metrics.h
@@ -0,0 +1,54 @@
+/* tcp_metrics.h - TCP Metrics Interface */
+
+#ifndef _LINUX_TCP_METRICS_H
+#define _LINUX_TCP_METRICS_H
+
+#include <linux/types.h>
+
+/* NETLINK_GENERIC related info
+ */
+#define TCP_METRICS_GENL_NAME		"tcp_metrics"
+#define TCP_METRICS_GENL_VERSION	0x1
+
+enum tcp_metric_index {
+	TCP_METRIC_RTT,
+	TCP_METRIC_RTTVAR,
+	TCP_METRIC_SSTHRESH,
+	TCP_METRIC_CWND,
+	TCP_METRIC_REORDERING,
+
+	/* Always last.  */
+	__TCP_METRIC_MAX,
+};
+
+#define TCP_METRIC_MAX	(__TCP_METRIC_MAX - 1)
+
+enum {
+	TCP_METRICS_ATTR_UNSPEC,
+	TCP_METRICS_ATTR_ADDR_IPV4,		/* u32 */
+	TCP_METRICS_ATTR_ADDR_IPV6,		/* binary */
+	TCP_METRICS_ATTR_AGE,			/* msecs */
+	TCP_METRICS_ATTR_TW_TSVAL,		/* u32, raw, rcv tsval */
+	TCP_METRICS_ATTR_TW_TS_STAMP,		/* s32, sec age */
+	TCP_METRICS_ATTR_VALS,			/* nested +1, u32 */
+	TCP_METRICS_ATTR_FOPEN_MSS,		/* u16 */
+	TCP_METRICS_ATTR_FOPEN_SYN_DROPS,	/* u16, count of drops */
+	TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS,	/* msecs age */
+	TCP_METRICS_ATTR_FOPEN_COOKIE,		/* binary */
+
+	__TCP_METRICS_ATTR_MAX,
+};
+
+#define TCP_METRICS_ATTR_MAX	(__TCP_METRICS_ATTR_MAX - 1)
+
+enum {
+	TCP_METRICS_CMD_UNSPEC,
+	TCP_METRICS_CMD_GET,
+	TCP_METRICS_CMD_DEL,
+
+	__TCP_METRICS_CMD_MAX,
+};
+
+#define TCP_METRICS_CMD_MAX	(__TCP_METRICS_CMD_MAX - 1)
+
+#endif /* _LINUX_TCP_METRICS_H */
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0abe67bb4d3a..988edb63ee73 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/tcp.h>
 #include <linux/hash.h>
+#include <linux/tcp_metrics.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/net_namespace.h>
@@ -17,20 +18,10 @@
 #include <net/ipv6.h>
 #include <net/dst.h>
 #include <net/tcp.h>
+#include <net/genetlink.h>
 
 int sysctl_tcp_nometrics_save __read_mostly;
 
-enum tcp_metric_index {
-	TCP_METRIC_RTT,
-	TCP_METRIC_RTTVAR,
-	TCP_METRIC_SSTHRESH,
-	TCP_METRIC_CWND,
-	TCP_METRIC_REORDERING,
-
-	/* Always last.  */
-	TCP_METRIC_MAX,
-};
-
 struct tcp_fastopen_metrics {
 	u16	mss;
 	u16	syn_loss:10;		/* Recurring Fast Open SYN losses */
@@ -45,8 +36,10 @@ struct tcp_metrics_block {
 	u32				tcpm_ts;
 	u32				tcpm_ts_stamp;
 	u32				tcpm_lock;
-	u32				tcpm_vals[TCP_METRIC_MAX];
+	u32				tcpm_vals[TCP_METRIC_MAX + 1];
 	struct tcp_fastopen_metrics	tcpm_fastopen;
+
+	struct rcu_head			rcu_head;
 };
 
 static bool tcp_metric_locked(struct tcp_metrics_block *tm,
@@ -690,6 +683,325 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
 	rcu_read_unlock();
 }
 
+static struct genl_family tcp_metrics_nl_family = {
+	.id		= GENL_ID_GENERATE,
+	.hdrsize	= 0,
+	.name		= TCP_METRICS_GENL_NAME,
+	.version	= TCP_METRICS_GENL_VERSION,
+	.maxattr	= TCP_METRICS_ATTR_MAX,
+	.netnsok	= true,
+};
+
+static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
+	[TCP_METRICS_ATTR_ADDR_IPV4]	= { .type = NLA_U32, },
+	[TCP_METRICS_ATTR_ADDR_IPV6]	= { .type = NLA_BINARY,
+					    .len = sizeof(struct in6_addr), },
+	/* Following attributes are not received for GET/DEL,
+	 * we keep them for reference
+	 */
+#if 0
+	[TCP_METRICS_ATTR_AGE]		= { .type = NLA_MSECS, },
+	[TCP_METRICS_ATTR_TW_TSVAL]	= { .type = NLA_U32, },
+	[TCP_METRICS_ATTR_TW_TS_STAMP]	= { .type = NLA_S32, },
+	[TCP_METRICS_ATTR_VALS]		= { .type = NLA_NESTED, },
+	[TCP_METRICS_ATTR_FOPEN_MSS]	= { .type = NLA_U16, },
+	[TCP_METRICS_ATTR_FOPEN_SYN_DROPS]	= { .type = NLA_U16, },
+	[TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS]	= { .type = NLA_MSECS, },
+	[TCP_METRICS_ATTR_FOPEN_COOKIE]	= { .type = NLA_BINARY,
+					    .len = TCP_FASTOPEN_COOKIE_MAX, },
+#endif
+};
+
+/* Add attributes, caller cancels its header on failure */
+static int tcp_metrics_fill_info(struct sk_buff *msg,
+				 struct tcp_metrics_block *tm)
+{
+	struct nlattr *nest;
+	int i;
+
+	switch (tm->tcpm_addr.family) {
+	case AF_INET:
+		if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4,
+				tm->tcpm_addr.addr.a4) < 0)
+			goto nla_put_failure;
+		break;
+	case AF_INET6:
+		if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16,
+			    tm->tcpm_addr.addr.a6) < 0)
+			goto nla_put_failure;
+		break;
+	default:
+		return -EAFNOSUPPORT;
+	}
+
+	if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
+			  jiffies - tm->tcpm_stamp) < 0)
+		goto nla_put_failure;
+	if (tm->tcpm_ts_stamp) {
+		if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP,
+				(s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0)
+			goto nla_put_failure;
+		if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL,
+				tm->tcpm_ts) < 0)
+			goto nla_put_failure;
+	}
+
+	{
+		int n = 0;
+
+		nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS);
+		if (!nest)
+			goto nla_put_failure;
+		for (i = 0; i < TCP_METRIC_MAX + 1; i++) {
+			if (!tm->tcpm_vals[i])
+				continue;
+			if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0)
+				goto nla_put_failure;
+			n++;
+		}
+		if (n)
+			nla_nest_end(msg, nest);
+		else
+			nla_nest_cancel(msg, nest);
+	}
+
+	{
+		struct tcp_fastopen_metrics tfom_copy[1], *tfom;
+		unsigned int seq;
+
+		do {
+			seq = read_seqbegin(&fastopen_seqlock);
+			tfom_copy[0] = tm->tcpm_fastopen;
+		} while (read_seqretry(&fastopen_seqlock, seq));
+
+		tfom = tfom_copy;
+		if (tfom->mss &&
+		    nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_MSS,
+				tfom->mss) < 0)
+			goto nla_put_failure;
+		if (tfom->syn_loss &&
+		    (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS,
+				tfom->syn_loss) < 0 ||
+		     nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS,
+				jiffies - tfom->last_syn_loss) < 0))
+			goto nla_put_failure;
+		if (tfom->cookie.len > 0 &&
+		    nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE,
+			    tfom->cookie.len, tfom->cookie.val) < 0)
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int tcp_metrics_dump_info(struct sk_buff *skb,
+				 struct netlink_callback *cb,
+				 struct tcp_metrics_block *tm)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			  &tcp_metrics_nl_family, NLM_F_MULTI,
+			  TCP_METRICS_CMD_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (tcp_metrics_fill_info(skb, tm) < 0)
+		goto nla_put_failure;
+
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int tcp_metrics_nl_dump(struct sk_buff *skb,
+			       struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
+	unsigned int row, s_row = cb->args[0];
+	int s_col = cb->args[1], col = s_col;
+
+	for (row = s_row; row < max_rows; row++, s_col = 0) {
+		struct tcp_metrics_block *tm;
+		struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row;
+
+		rcu_read_lock();
+		for (col = 0, tm = rcu_dereference(hb->chain); tm;
+		     tm = rcu_dereference(tm->tcpm_next), col++) {
+			if (col < s_col)
+				continue;
+			if (tcp_metrics_dump_info(skb, cb, tm) < 0) {
+				rcu_read_unlock();
+				goto done;
+			}
+		}
+		rcu_read_unlock();
+	}
+
+done:
+	cb->args[0] = row;
+	cb->args[1] = col;
+	return skb->len;
+}
+
+static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr,
+			 unsigned int *hash, int optional)
+{
+	struct nlattr *a;
+
+	a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4];
+	if (a) {
+		addr->family = AF_INET;
+		addr->addr.a4 = nla_get_be32(a);
+		*hash = (__force unsigned int) addr->addr.a4;
+		return 0;
+	}
+	a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6];
+	if (a) {
+		if (nla_len(a) != sizeof(sizeof(struct in6_addr)))
+			return -EINVAL;
+		addr->family = AF_INET6;
+		memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6));
+		*hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6);
+		return 0;
+	}
+	return optional ? 1 : -EAFNOSUPPORT;
+}
+
+static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct tcp_metrics_block *tm;
+	struct inetpeer_addr addr;
+	unsigned int hash;
+	struct sk_buff *msg;
+	struct net *net = genl_info_net(info);
+	void *reply;
+	int ret;
+
+	ret = parse_nl_addr(info, &addr, &hash, 0);
+	if (ret < 0)
+		return ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	reply = genlmsg_put_reply(msg, info, &tcp_metrics_nl_family, 0,
+				  info->genlhdr->cmd);
+	if (!reply)
+		goto nla_put_failure;
+
+	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
+	ret = -ESRCH;
+	rcu_read_lock();
+	for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm;
+	     tm = rcu_dereference(tm->tcpm_next)) {
+		if (addr_same(&tm->tcpm_addr, &addr)) {
+			ret = tcp_metrics_fill_info(msg, tm);
+			break;
+		}
+	}
+	rcu_read_unlock();
+	if (ret < 0)
+		goto out_free;
+
+	genlmsg_end(msg, reply);
+	return genlmsg_reply(msg, info);
+
+nla_put_failure:
+	ret = -EMSGSIZE;
+
+out_free:
+	nlmsg_free(msg);
+	return ret;
+}
+
+#define deref_locked_genl(p)	\
+	rcu_dereference_protected(p, lockdep_genl_is_held() && \
+				     lockdep_is_held(&tcp_metrics_lock))
+
+#define deref_genl(p)	rcu_dereference_protected(p, lockdep_genl_is_held())
+
+static int tcp_metrics_flush_all(struct net *net)
+{
+	unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log;
+	struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash;
+	struct tcp_metrics_block *tm;
+	unsigned int row;
+
+	for (row = 0; row < max_rows; row++, hb++) {
+		spin_lock_bh(&tcp_metrics_lock);
+		tm = deref_locked_genl(hb->chain);
+		if (tm)
+			hb->chain = NULL;
+		spin_unlock_bh(&tcp_metrics_lock);
+		while (tm) {
+			struct tcp_metrics_block *next;
+
+			next = deref_genl(tm->tcpm_next);
+			kfree_rcu(tm, rcu_head);
+			tm = next;
+		}
+	}
+	return 0;
+}
+
+static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct tcpm_hash_bucket *hb;
+	struct tcp_metrics_block *tm;
+	struct tcp_metrics_block __rcu **pp;
+	struct inetpeer_addr addr;
+	unsigned int hash;
+	struct net *net = genl_info_net(info);
+	int ret;
+
+	ret = parse_nl_addr(info, &addr, &hash, 1);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		return tcp_metrics_flush_all(net);
+
+	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
+	hb = net->ipv4.tcp_metrics_hash + hash;
+	pp = &hb->chain;
+	spin_lock_bh(&tcp_metrics_lock);
+	for (tm = deref_locked_genl(*pp); tm;
+	     pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) {
+		if (addr_same(&tm->tcpm_addr, &addr)) {
+			*pp = tm->tcpm_next;
+			break;
+		}
+	}
+	spin_unlock_bh(&tcp_metrics_lock);
+	if (!tm)
+		return -ESRCH;
+	kfree_rcu(tm, rcu_head);
+	return 0;
+}
+
+static struct genl_ops tcp_metrics_nl_ops[] = {
+	{
+		.cmd = TCP_METRICS_CMD_GET,
+		.doit = tcp_metrics_nl_cmd_get,
+		.dumpit = tcp_metrics_nl_dump,
+		.policy = tcp_metrics_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = TCP_METRICS_CMD_DEL,
+		.doit = tcp_metrics_nl_cmd_del,
+		.policy = tcp_metrics_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
 static unsigned int tcpmhash_entries;
 static int __init set_tcpmhash_entries(char *str)
 {
@@ -753,5 +1065,21 @@ static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
 
 void __init tcp_metrics_init(void)
 {
-	register_pernet_subsys(&tcp_net_metrics_ops);
+	int ret;
+
+	ret = register_pernet_subsys(&tcp_net_metrics_ops);
+	if (ret < 0)
+		goto cleanup;
+	ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
+					    tcp_metrics_nl_ops,
+					    ARRAY_SIZE(tcp_metrics_nl_ops));
+	if (ret < 0)
+		goto cleanup_subsys;
+	return;
+
+cleanup_subsys:
+	unregister_pernet_subsys(&tcp_net_metrics_ops);
+
+cleanup:
+	return;
 }
-- 
cgit v1.2.3


From 23d3b8bfb8eb20e7d96afa09991e6a5ed1c83164 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 5 Sep 2012 01:02:56 +0000
Subject: net: qdisc busylock needs lockdep annotations

It seems we need to provide ability for stacked devices
to use specific lock_class_key for sch->busylock

We could instead default l2tpeth tx_queue_len to 0 (no qdisc), but
a user might use a qdisc anyway.

(So same fixes are probably needed on non LLTX stacked drivers)

Noticed while stressing L2TPV3 setup :

======================================================
 [ INFO: possible circular locking dependency detected ]
 3.6.0-rc3+ #788 Not tainted
 -------------------------------------------------------
 netperf/4660 is trying to acquire lock:
  (l2tpsock){+.-...}, at: [<ffffffffa0208db2>] l2tp_xmit_skb+0x172/0xa50 [l2tp_core]

 but task is already holding lock:
  (&(&sch->busylock)->rlock){+.-...}, at: [<ffffffff81596595>] dev_queue_xmit+0xd75/0xe00

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (&(&sch->busylock)->rlock){+.-...}:
        [<ffffffff810a5df0>] lock_acquire+0x90/0x200
        [<ffffffff817499fc>] _raw_spin_lock_irqsave+0x4c/0x60
        [<ffffffff81074872>] __wake_up+0x32/0x70
        [<ffffffff8136d39e>] tty_wakeup+0x3e/0x80
        [<ffffffff81378fb3>] pty_write+0x73/0x80
        [<ffffffff8136cb4c>] tty_put_char+0x3c/0x40
        [<ffffffff813722b2>] process_echoes+0x142/0x330
        [<ffffffff813742ab>] n_tty_receive_buf+0x8fb/0x1230
        [<ffffffff813777b2>] flush_to_ldisc+0x142/0x1c0
        [<ffffffff81062818>] process_one_work+0x198/0x760
        [<ffffffff81063236>] worker_thread+0x186/0x4b0
        [<ffffffff810694d3>] kthread+0x93/0xa0
        [<ffffffff81753e24>] kernel_thread_helper+0x4/0x10

 -> #0 (l2tpsock){+.-...}:
        [<ffffffff810a5288>] __lock_acquire+0x1628/0x1b10
        [<ffffffff810a5df0>] lock_acquire+0x90/0x200
        [<ffffffff817498c1>] _raw_spin_lock+0x41/0x50
        [<ffffffffa0208db2>] l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
        [<ffffffffa021a802>] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth]
        [<ffffffff815952b2>] dev_hard_start_xmit+0x502/0xa70
        [<ffffffff815b63ce>] sch_direct_xmit+0xfe/0x290
        [<ffffffff81595a05>] dev_queue_xmit+0x1e5/0xe00
        [<ffffffff815d9d60>] ip_finish_output+0x3d0/0x890
        [<ffffffff815db019>] ip_output+0x59/0xf0
        [<ffffffff815da36d>] ip_local_out+0x2d/0xa0
        [<ffffffff815da5a3>] ip_queue_xmit+0x1c3/0x680
        [<ffffffff815f4192>] tcp_transmit_skb+0x402/0xa60
        [<ffffffff815f4a94>] tcp_write_xmit+0x1f4/0xa30
        [<ffffffff815f5300>] tcp_push_one+0x30/0x40
        [<ffffffff815e6672>] tcp_sendmsg+0xe82/0x1040
        [<ffffffff81614495>] inet_sendmsg+0x125/0x230
        [<ffffffff81576cdc>] sock_sendmsg+0xdc/0xf0
        [<ffffffff81579ece>] sys_sendto+0xfe/0x130
        [<ffffffff81752c92>] system_call_fastpath+0x16/0x1b
  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(&(&sch->busylock)->rlock);
                                lock(l2tpsock);
                                lock(&(&sch->busylock)->rlock);
   lock(l2tpsock);

  *** DEADLOCK ***

 5 locks held by netperf/4660:
  #0:  (sk_lock-AF_INET){+.+.+.}, at: [<ffffffff815e581c>] tcp_sendmsg+0x2c/0x1040
  #1:  (rcu_read_lock){.+.+..}, at: [<ffffffff815da3e0>] ip_queue_xmit+0x0/0x680
  #2:  (rcu_read_lock_bh){.+....}, at: [<ffffffff815d9ac5>] ip_finish_output+0x135/0x890
  #3:  (rcu_read_lock_bh){.+....}, at: [<ffffffff81595820>] dev_queue_xmit+0x0/0xe00
  #4:  (&(&sch->busylock)->rlock){+.-...}, at: [<ffffffff81596595>] dev_queue_xmit+0xd75/0xe00

 stack backtrace:
 Pid: 4660, comm: netperf Not tainted 3.6.0-rc3+ #788
 Call Trace:
  [<ffffffff8173dbf8>] print_circular_bug+0x1fb/0x20c
  [<ffffffff810a5288>] __lock_acquire+0x1628/0x1b10
  [<ffffffff810a334b>] ? check_usage+0x9b/0x4d0
  [<ffffffff810a3f44>] ? __lock_acquire+0x2e4/0x1b10
  [<ffffffff810a5df0>] lock_acquire+0x90/0x200
  [<ffffffffa0208db2>] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
  [<ffffffff817498c1>] _raw_spin_lock+0x41/0x50
  [<ffffffffa0208db2>] ? l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
  [<ffffffffa0208db2>] l2tp_xmit_skb+0x172/0xa50 [l2tp_core]
  [<ffffffffa021a802>] l2tp_eth_dev_xmit+0x32/0x60 [l2tp_eth]
  [<ffffffff815952b2>] dev_hard_start_xmit+0x502/0xa70
  [<ffffffff81594e0e>] ? dev_hard_start_xmit+0x5e/0xa70
  [<ffffffff81595961>] ? dev_queue_xmit+0x141/0xe00
  [<ffffffff815b63ce>] sch_direct_xmit+0xfe/0x290
  [<ffffffff81595a05>] dev_queue_xmit+0x1e5/0xe00
  [<ffffffff81595820>] ? dev_hard_start_xmit+0xa70/0xa70
  [<ffffffff815d9d60>] ip_finish_output+0x3d0/0x890
  [<ffffffff815d9ac5>] ? ip_finish_output+0x135/0x890
  [<ffffffff815db019>] ip_output+0x59/0xf0
  [<ffffffff815da36d>] ip_local_out+0x2d/0xa0
  [<ffffffff815da5a3>] ip_queue_xmit+0x1c3/0x680
  [<ffffffff815da3e0>] ? ip_local_out+0xa0/0xa0
  [<ffffffff815f4192>] tcp_transmit_skb+0x402/0xa60
  [<ffffffff815fa25e>] ? tcp_md5_do_lookup+0x18e/0x1a0
  [<ffffffff815f4a94>] tcp_write_xmit+0x1f4/0xa30
  [<ffffffff815f5300>] tcp_push_one+0x30/0x40
  [<ffffffff815e6672>] tcp_sendmsg+0xe82/0x1040
  [<ffffffff81614495>] inet_sendmsg+0x125/0x230
  [<ffffffff81614370>] ? inet_create+0x6b0/0x6b0
  [<ffffffff8157e6e2>] ? sock_update_classid+0xc2/0x3b0
  [<ffffffff8157e750>] ? sock_update_classid+0x130/0x3b0
  [<ffffffff81576cdc>] sock_sendmsg+0xdc/0xf0
  [<ffffffff81162579>] ? fget_light+0x3f9/0x4f0
  [<ffffffff81579ece>] sys_sendto+0xfe/0x130
  [<ffffffff810a69ad>] ? trace_hardirqs_on+0xd/0x10
  [<ffffffff8174a0b0>] ? _raw_spin_unlock_irq+0x30/0x50
  [<ffffffff810757e3>] ? finish_task_switch+0x83/0xf0
  [<ffffffff810757a6>] ? finish_task_switch+0x46/0xf0
  [<ffffffff81752cb7>] ? sysret_check+0x1b/0x56
  [<ffffffff81752c92>] system_call_fastpath+0x16/0x1b

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 net/l2tp/l2tp_eth.c       | 3 ++-
 net/sched/sch_generic.c   | 9 ++++++++-
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ccac82e61604..ae3153c0db0a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1322,6 +1322,8 @@ struct net_device {
 	/* phy device may attach itself for hardware timestamping */
 	struct phy_device *phydev;
 
+	struct lock_class_key *qdisc_tx_busylock;
+
 	/* group the device belongs to */
 	int group;
 
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index f9ee74deeac2..ba89997bcd2e 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -67,6 +67,7 @@ static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
 	return net_generic(net, l2tp_eth_net_id);
 }
 
+static struct lock_class_key l2tp_eth_tx_busylock;
 static int l2tp_eth_dev_init(struct net_device *dev)
 {
 	struct l2tp_eth *priv = netdev_priv(dev);
@@ -74,7 +75,7 @@ static int l2tp_eth_dev_init(struct net_device *dev)
 	priv->dev = dev;
 	eth_hw_addr_random(dev);
 	memset(&dev->broadcast[0], 0xff, 6);
-
+	dev->qdisc_tx_busylock = &l2tp_eth_tx_busylock;
 	return 0;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6c4d5fe53ce8..aefc1504dc88 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -527,6 +527,8 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 };
 EXPORT_SYMBOL(pfifo_fast_ops);
 
+static struct lock_class_key qdisc_tx_busylock;
+
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  struct Qdisc_ops *ops)
 {
@@ -534,6 +536,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	struct Qdisc *sch;
 	unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
 	int err = -ENOBUFS;
+	struct net_device *dev = dev_queue->dev;
 
 	p = kzalloc_node(size, GFP_KERNEL,
 			 netdev_queue_numa_node_read(dev_queue));
@@ -553,12 +556,16 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	}
 	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
+
 	spin_lock_init(&sch->busylock);
+	lockdep_set_class(&sch->busylock,
+			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
 	sch->dev_queue = dev_queue;
-	dev_hold(qdisc_dev(sch));
+	dev_hold(dev);
 	atomic_set(&sch->refcnt, 1);
 
 	return sch;
-- 
cgit v1.2.3


From ef2c7d7b59708d54213c7556a82d14de9a7e4475 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 5 Sep 2012 02:12:42 +0000
Subject: ipv6: fix handling of blackhole and prohibit routes

When adding a blackhole or a prohibit route, they were handling like classic
routes. Moreover, it was only possible to add this kind of routes by specifying
an interface.

Bug already reported here:
  http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=498498

Before the patch:
  $ ip route add blackhole 2001::1/128
  RTNETLINK answers: No such device
  $ ip route add blackhole 2001::1/128 dev eth0
  $ ip -6 route | grep 2001
  2001::1 dev eth0  metric 1024

After:
  $ ip route add blackhole 2001::1/128
  $ ip -6 route | grep 2001
  blackhole 2001::1 dev lo  metric 1024  error -22

v2: wrong patch
v3: add a field fc_type in struct fib6_config to store RTN_* type

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h |  1 +
 net/ipv6/route.c      | 32 ++++++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 0fedbd8d747a..cd64cf35a49f 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -37,6 +37,7 @@ struct fib6_config {
 	int		fc_ifindex;
 	u32		fc_flags;
 	u32		fc_protocol;
+	u32		fc_type;	/* only 8 bits are used */
 
 	struct in6_addr	fc_dst;
 	struct in6_addr	fc_src;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0ddf2d132e7f..fa264447a751 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1463,8 +1463,18 @@ int ip6_route_add(struct fib6_config *cfg)
 		}
 		rt->dst.output = ip6_pkt_discard_out;
 		rt->dst.input = ip6_pkt_discard;
-		rt->dst.error = -ENETUNREACH;
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
+		switch (cfg->fc_type) {
+		case RTN_BLACKHOLE:
+			rt->dst.error = -EINVAL;
+			break;
+		case RTN_PROHIBIT:
+			rt->dst.error = -EACCES;
+			break;
+		default:
+			rt->dst.error = -ENETUNREACH;
+			break;
+		}
 		goto install_route;
 	}
 
@@ -2261,8 +2271,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	cfg->fc_src_len = rtm->rtm_src_len;
 	cfg->fc_flags = RTF_UP;
 	cfg->fc_protocol = rtm->rtm_protocol;
+	cfg->fc_type = rtm->rtm_type;
 
-	if (rtm->rtm_type == RTN_UNREACHABLE)
+	if (rtm->rtm_type == RTN_UNREACHABLE ||
+	    rtm->rtm_type == RTN_BLACKHOLE ||
+	    rtm->rtm_type == RTN_PROHIBIT)
 		cfg->fc_flags |= RTF_REJECT;
 
 	if (rtm->rtm_type == RTN_LOCAL)
@@ -2391,8 +2404,19 @@ static int rt6_fill_node(struct net *net,
 	rtm->rtm_table = table;
 	if (nla_put_u32(skb, RTA_TABLE, table))
 		goto nla_put_failure;
-	if (rt->rt6i_flags & RTF_REJECT)
-		rtm->rtm_type = RTN_UNREACHABLE;
+	if (rt->rt6i_flags & RTF_REJECT) {
+		switch (rt->dst.error) {
+		case -EINVAL:
+			rtm->rtm_type = RTN_BLACKHOLE;
+			break;
+		case -EACCES:
+			rtm->rtm_type = RTN_PROHIBIT;
+			break;
+		default:
+			rtm->rtm_type = RTN_UNREACHABLE;
+			break;
+		}
+	}
 	else if (rt->rt6i_flags & RTF_LOCAL)
 		rtm->rtm_type = RTN_LOCAL;
 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
-- 
cgit v1.2.3


From ad0ed62f340a03acca5ae3e7430a08bf289aaffe Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 24 Jul 2012 17:38:43 +0200
Subject: wireless: remove obsolete chan no/center freq conversion functions

There are a number of functions that shouldn't really
be used when modern functions that take the band are
available in cfg80211. Remove these, but for now keep
 * ieee80211_freq_to_dsss_chan and
 * ieee80211_dsss_chan_to_freq
as they're used in older drivers.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 80 -----------------------------------------------
 1 file changed, 80 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e02fc682bb68..2385119f8bb0 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1934,36 +1934,6 @@ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
 	return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC;
 }
 
-/**
- * ieee80211_fhss_chan_to_freq - get channel frequency
- * @channel: the FHSS channel
- *
- * Convert IEEE802.11 FHSS channel to frequency (MHz)
- * Ref IEEE 802.11-2007 section 14.6
- */
-static inline int ieee80211_fhss_chan_to_freq(int channel)
-{
-	if ((channel > 1) && (channel < 96))
-		return channel + 2400;
-	else
-		return -1;
-}
-
-/**
- * ieee80211_freq_to_fhss_chan - get channel
- * @freq: the channels frequency
- *
- * Convert frequency (MHz) to IEEE802.11 FHSS channel
- * Ref IEEE 802.11-2007 section 14.6
- */
-static inline int ieee80211_freq_to_fhss_chan(int freq)
-{
-	if ((freq > 2401) && (freq < 2496))
-		return freq - 2400;
-	else
-		return -1;
-}
-
 /**
  * ieee80211_dsss_chan_to_freq - get channel center frequency
  * @channel: the DSSS channel
@@ -2000,56 +1970,6 @@ static inline int ieee80211_freq_to_dsss_chan(int freq)
 		return -1;
 }
 
-/* Convert IEEE802.11 HR DSSS channel to frequency (MHz) and back
- * Ref IEEE 802.11-2007 section 18.4.6.2
- *
- * The channels and frequencies are the same as those defined for DSSS
- */
-#define ieee80211_hr_chan_to_freq(chan) ieee80211_dsss_chan_to_freq(chan)
-#define ieee80211_freq_to_hr_chan(freq) ieee80211_freq_to_dsss_chan(freq)
-
-/* Convert IEEE802.11 ERP channel to frequency (MHz) and back
- * Ref IEEE 802.11-2007 section 19.4.2
- */
-#define ieee80211_erp_chan_to_freq(chan) ieee80211_hr_chan_to_freq(chan)
-#define ieee80211_freq_to_erp_chan(freq) ieee80211_freq_to_hr_chan(freq)
-
-/**
- * ieee80211_ofdm_chan_to_freq - get channel center frequency
- * @s_freq: starting frequency == (dotChannelStartingFactor/2) MHz
- * @channel: the OFDM channel
- *
- * Convert IEEE802.11 OFDM channel to center frequency (MHz)
- * Ref IEEE 802.11-2007 section 17.3.8.3.2
- */
-static inline int ieee80211_ofdm_chan_to_freq(int s_freq, int channel)
-{
-	if ((channel > 0) && (channel <= 200) &&
-	    (s_freq >= 4000))
-		return s_freq + (channel * 5);
-	else
-		return -1;
-}
-
-/**
- * ieee80211_freq_to_ofdm_channel - get channel
- * @s_freq: starting frequency == (dotChannelStartingFactor/2) MHz
- * @freq: the frequency
- *
- * Convert frequency (MHz) to IEEE802.11 OFDM channel
- * Ref IEEE 802.11-2007 section 17.3.8.3.2
- *
- * This routine selects the channel with the closest center frequency.
- */
-static inline int ieee80211_freq_to_ofdm_chan(int s_freq, int freq)
-{
-	if ((freq > (s_freq + 2)) && (freq <= (s_freq + 1202)) &&
-	    (s_freq >= 4000))
-		return (freq + 2 - s_freq) / 5;
-	else
-		return -1;
-}
-
 /**
  * ieee80211_tu_to_usec - convert time units (TU) to microseconds
  * @tu: the TUs
-- 
cgit v1.2.3


From dbe9a4173ea53b72b2c35d19f676a85b69f1c9fe Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 6 Sep 2012 18:20:01 +0000
Subject: scm: Don't use struct ucred in NETLINK_CB and struct scm_cookie.

Passing uids and gids on NETLINK_CB from a process in one user
namespace to a process in another user namespace can result in the
wrong uid or gid being presented to userspace.  Avoid that problem by
passing kuids and kgids instead.

- define struct scm_creds for use in scm_cookie and netlink_skb_parms
  that holds uid and gid information in kuid_t and kgid_t.

- Modify scm_set_cred to fill out scm_creds by heand instead of using
  cred_to_ucred to fill out struct ucred.  This conversion ensures
  userspace does not get incorrect uid or gid values to look at.

- Modify scm_recv to convert from struct scm_creds to struct ucred
  before copying credential values to userspace.

- Modify __scm_send to populate struct scm_creds on in the scm_cookie,
  instead of just copying struct ucred from userspace.

- Modify netlink_sendmsg to copy scm_creds instead of struct ucred
  into the NETLINK_CB.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  3 ++-
 include/net/scm.h        | 23 +++++++++++++++++++----
 net/core/scm.c           | 17 +++++++++++------
 net/netlink/af_netlink.c |  2 +-
 4 files changed, 33 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index c9fdde2bc73f..df73cf4b0290 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -153,6 +153,7 @@ struct nlattr {
 
 #include <linux/capability.h>
 #include <linux/skbuff.h>
+#include <net/scm.h>
 
 struct net;
 
@@ -162,7 +163,7 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
 }
 
 struct netlink_skb_parms {
-	struct ucred		creds;		/* Skb credentials	*/
+	struct scm_creds	creds;		/* Skb credentials	*/
 	__u32			pid;
 	__u32			dst_group;
 	struct sock		*ssk;
diff --git a/include/net/scm.h b/include/net/scm.h
index 7dc0854f0b38..456695f5cbc4 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -12,6 +12,12 @@
  */
 #define SCM_MAX_FD	253
 
+struct scm_creds {
+	u32	pid;
+	kuid_t	uid;
+	kgid_t	gid;
+};
+
 struct scm_fp_list {
 	short			count;
 	short			max;
@@ -22,7 +28,7 @@ struct scm_cookie {
 	struct pid		*pid;		/* Skb credentials */
 	const struct cred	*cred;
 	struct scm_fp_list	*fp;		/* Passed files		*/
-	struct ucred		creds;		/* Skb credentials	*/
+	struct scm_creds	creds;		/* Skb credentials	*/
 #ifdef CONFIG_SECURITY_NETWORK
 	u32			secid;		/* Passed security ID 	*/
 #endif
@@ -49,7 +55,9 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm,
 {
 	scm->pid  = get_pid(pid);
 	scm->cred = cred ? get_cred(cred) : NULL;
-	cred_to_ucred(pid, cred, &scm->creds);
+	scm->creds.pid = pid_vnr(pid);
+	scm->creds.uid = cred ? cred->euid : INVALID_UID;
+	scm->creds.gid = cred ? cred->egid : INVALID_GID;
 }
 
 static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
@@ -112,8 +120,15 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
 		return;
 	}
 
-	if (test_bit(SOCK_PASSCRED, &sock->flags))
-		put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds);
+	if (test_bit(SOCK_PASSCRED, &sock->flags)) {
+		struct user_namespace *current_ns = current_user_ns();
+		struct ucred ucreds = {
+			.pid = scm->creds.pid,
+			.uid = from_kuid_munged(current_ns, scm->creds.uid),
+			.gid = from_kgid_munged(current_ns, scm->creds.gid),
+		};
+		put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(ucreds), &ucreds);
+	}
 
 	scm_destroy_cred(scm);
 
diff --git a/net/core/scm.c b/net/core/scm.c
index 6ab491d6c26f..9c1c63da3ca8 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -155,19 +155,21 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 			break;
 		case SCM_CREDENTIALS:
 		{
+			struct ucred creds;
 			kuid_t uid;
 			kgid_t gid;
 			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct ucred)))
 				goto error;
-			memcpy(&p->creds, CMSG_DATA(cmsg), sizeof(struct ucred));
-			err = scm_check_creds(&p->creds);
+			memcpy(&creds, CMSG_DATA(cmsg), sizeof(struct ucred));
+			err = scm_check_creds(&creds);
 			if (err)
 				goto error;
 
-			if (!p->pid || pid_vnr(p->pid) != p->creds.pid) {
+			p->creds.pid = creds.pid;
+			if (!p->pid || pid_vnr(p->pid) != creds.pid) {
 				struct pid *pid;
 				err = -ESRCH;
-				pid = find_get_pid(p->creds.pid);
+				pid = find_get_pid(creds.pid);
 				if (!pid)
 					goto error;
 				put_pid(p->pid);
@@ -175,11 +177,14 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 			}
 
 			err = -EINVAL;
-			uid = make_kuid(current_user_ns(), p->creds.uid);
-			gid = make_kgid(current_user_ns(), p->creds.gid);
+			uid = make_kuid(current_user_ns(), creds.uid);
+			gid = make_kgid(current_user_ns(), creds.gid);
 			if (!uid_valid(uid) || !gid_valid(gid))
 				goto error;
 
+			p->creds.uid = uid;
+			p->creds.gid = gid;
+
 			if (!p->cred ||
 			    !uid_eq(p->cred->euid, uid) ||
 			    !gid_eq(p->cred->egid, gid)) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 382119917166..f530b1ca1773 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1399,7 +1399,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	NETLINK_CB(skb).pid	= nlk->pid;
 	NETLINK_CB(skb).dst_group = dst_group;
-	memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+	NETLINK_CB(skb).creds	= siocb->scm->creds;
 
 	err = -EFAULT;
 	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
-- 
cgit v1.2.3


From 4ccfe6d4109252dfadcd6885f33ed600ee03dbf8 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 7 Sep 2012 00:45:29 +0000
Subject: ipv4/route: arg delay is useless in rt_cache_flush()

Since route cache deletion (89aef8921bfbac22f), delay is no
more used. Remove it.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h     |  2 +-
 net/ipv4/arp.c          |  2 +-
 net/ipv4/devinet.c      |  6 +++---
 net/ipv4/fib_frontend.c | 20 ++++++++++----------
 net/ipv4/fib_rules.c    |  2 +-
 net/ipv4/fib_trie.c     |  6 +++---
 net/ipv4/route.c        | 19 +++----------------
 7 files changed, 22 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index 776a27f1ab78..da22243d2760 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -108,7 +108,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
 
 struct in_device;
 extern int		ip_rt_init(void);
-extern void		rt_cache_flush(struct net *net, int how);
+extern void		rt_cache_flush(struct net *net);
 extern void		rt_flush_dev(struct net_device *dev);
 extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
 extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 77e87aff419a..47800459e4cb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1225,7 +1225,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
 	switch (event) {
 	case NETDEV_CHANGEADDR:
 		neigh_changeaddr(&arp_tbl, dev);
-		rt_cache_flush(dev_net(dev), 0);
+		rt_cache_flush(dev_net(dev));
 		break;
 	default:
 		break;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index adf273f8ad2e..f14eff506743 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1500,7 +1500,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write,
 		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
 		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
 			if ((new_value == 0) && (old_value != 0))
-				rt_cache_flush(net, 0);
+				rt_cache_flush(net);
 	}
 
 	return ret;
@@ -1534,7 +1534,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 				dev_disable_lro(idev->dev);
 			}
 			rtnl_unlock();
-			rt_cache_flush(net, 0);
+			rt_cache_flush(net);
 		}
 	}
 
@@ -1551,7 +1551,7 @@ static int ipv4_doint_and_flush(ctl_table *ctl, int write,
 	struct net *net = ctl->extra2;
 
 	if (write && *valp != val)
-		rt_cache_flush(net, 0);
+		rt_cache_flush(net);
 
 	return ret;
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index acdee325d972..8a7cd795a96e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -148,7 +148,7 @@ static void fib_flush(struct net *net)
 	}
 
 	if (flushed)
-		rt_cache_flush(net, -1);
+		rt_cache_flush(net);
 }
 
 /*
@@ -999,11 +999,11 @@ static void nl_fib_lookup_exit(struct net *net)
 	net->ipv4.fibnl = NULL;
 }
 
-static void fib_disable_ip(struct net_device *dev, int force, int delay)
+static void fib_disable_ip(struct net_device *dev, int force)
 {
 	if (fib_sync_down_dev(dev, force))
 		fib_flush(dev_net(dev));
-	rt_cache_flush(dev_net(dev), delay);
+	rt_cache_flush(dev_net(dev));
 	arp_ifdown(dev);
 }
 
@@ -1020,7 +1020,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 		fib_sync_up(dev);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
-		rt_cache_flush(dev_net(dev), -1);
+		rt_cache_flush(dev_net(dev));
 		break;
 	case NETDEV_DOWN:
 		fib_del_ifaddr(ifa, NULL);
@@ -1029,9 +1029,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 			/* Last address was deleted from this interface.
 			 * Disable IP.
 			 */
-			fib_disable_ip(dev, 1, 0);
+			fib_disable_ip(dev, 1);
 		} else {
-			rt_cache_flush(dev_net(dev), -1);
+			rt_cache_flush(dev_net(dev));
 		}
 		break;
 	}
@@ -1045,7 +1045,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_UNREGISTER) {
-		fib_disable_ip(dev, 2, -1);
+		fib_disable_ip(dev, 2);
 		rt_flush_dev(dev);
 		return NOTIFY_DONE;
 	}
@@ -1061,14 +1061,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		fib_sync_up(dev);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
-		rt_cache_flush(net, -1);
+		rt_cache_flush(net);
 		break;
 	case NETDEV_DOWN:
-		fib_disable_ip(dev, 0, 0);
+		fib_disable_ip(dev, 0);
 		break;
 	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGE:
-		rt_cache_flush(net, 0);
+		rt_cache_flush(net);
 		break;
 	}
 	return NOTIFY_DONE;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a83d74e498d2..274309d3aded 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -259,7 +259,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 
 static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 {
-	rt_cache_flush(ops->fro_net, -1);
+	rt_cache_flush(ops->fro_net);
 }
 
 static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3c820dae235e..8d766106b540 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1286,7 +1286,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 
 			fib_release_info(fi_drop);
 			if (state & FA_S_ACCESSED)
-				rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+				rt_cache_flush(cfg->fc_nlinfo.nl_net);
 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
 				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
 
@@ -1333,7 +1333,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
 	list_add_tail_rcu(&new_fa->fa_list,
 			  (fa ? &fa->fa_list : fa_head));
 
-	rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+	rt_cache_flush(cfg->fc_nlinfo.nl_net);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 succeeded:
@@ -1711,7 +1711,7 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
 		trie_leaf_remove(t, l);
 
 	if (fa->fa_state & FA_S_ACCESSED)
-		rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
+		rt_cache_flush(cfg->fc_nlinfo.nl_net);
 
 	fib_release_info(fa->fa_info);
 	alias_free_mem_rcu(fa);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index dc9549b5eb1c..ab1e7c7c38fc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -461,11 +461,7 @@ static void rt_cache_invalidate(struct net *net)
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
 }
 
-/*
- * delay < 0  : invalidate cache (fast : entries will be deleted later)
- * delay >= 0 : invalidate & flush cache (can be long)
- */
-void rt_cache_flush(struct net *net, int delay)
+void rt_cache_flush(struct net *net)
 {
 	rt_cache_invalidate(net);
 }
@@ -2345,7 +2341,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 
 void ip_rt_multicast_event(struct in_device *in_dev)
 {
-	rt_cache_flush(dev_net(in_dev->dev), 0);
+	rt_cache_flush(dev_net(in_dev->dev));
 }
 
 #ifdef CONFIG_SYSCTL
@@ -2354,16 +2350,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
-		int flush_delay;
-		ctl_table ctl;
-		struct net *net;
-
-		memcpy(&ctl, __ctl, sizeof(ctl));
-		ctl.data = &flush_delay;
-		proc_dointvec(&ctl, write, buffer, lenp, ppos);
-
-		net = (struct net *)__ctl->extra1;
-		rt_cache_flush(net, flush_delay);
+		rt_cache_flush((struct net *)__ctl->extra1);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 220a60a425146b0e37998cc0b3082f0541aad866 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 3 Sep 2012 11:34:58 +0100
Subject: pps/ptp: Allow PHC devices to adjust PPS events for known delay

Initial version by Stuart Hodgson <smhodgson@solarflare.com>

Some PHC device drivers may deliver PPS events with a significant
and variable delay, but still be able to measure precisely what
that delay is.

Add a pps_sub_ts() function for subtracting a delay from the
timestamp(s) in a PPS event, and a PTP event type (PTP_CLOCK_PPSUSR)
for which the caller provides a complete PPS event.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 drivers/ptp/ptp_clock.c          |  5 +++++
 include/linux/pps_kernel.h       |  9 +++++++++
 include/linux/ptp_clock_kernel.h | 10 ++++++++--
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 1e528b539a07..966875dcda56 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -300,6 +300,11 @@ void ptp_clock_event(struct ptp_clock *ptp, struct ptp_clock_event *event)
 		pps_get_ts(&evt);
 		pps_event(ptp->pps_source, &evt, PTP_PPS_EVENT, NULL);
 		break;
+
+	case PTP_CLOCK_PPSUSR:
+		pps_event(ptp->pps_source, &event->pps_times,
+			  PTP_PPS_EVENT, NULL);
+		break;
 	}
 }
 EXPORT_SYMBOL(ptp_clock_event);
diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h
index 94048547f29a..0cc45ae1afd5 100644
--- a/include/linux/pps_kernel.h
+++ b/include/linux/pps_kernel.h
@@ -116,5 +116,14 @@ static inline void pps_get_ts(struct pps_event_time *ts)
 
 #endif /* CONFIG_NTP_PPS */
 
+/* Subtract known time delay from PPS event time(s) */
+static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec delta)
+{
+	ts->ts_real = timespec_sub(ts->ts_real, delta);
+#ifdef CONFIG_NTP_PPS
+	ts->ts_raw = timespec_sub(ts->ts_raw, delta);
+#endif
+}
+
 #endif /* LINUX_PPS_KERNEL_H */
 
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 945704c2ed65..a644b29f1161 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -21,6 +21,7 @@
 #ifndef _PTP_CLOCK_KERNEL_H_
 #define _PTP_CLOCK_KERNEL_H_
 
+#include <linux/pps_kernel.h>
 #include <linux/ptp_clock.h>
 
 
@@ -110,6 +111,7 @@ enum ptp_clock_events {
 	PTP_CLOCK_ALARM,
 	PTP_CLOCK_EXTTS,
 	PTP_CLOCK_PPS,
+	PTP_CLOCK_PPSUSR,
 };
 
 /**
@@ -117,13 +119,17 @@ enum ptp_clock_events {
  *
  * @type:  One of the ptp_clock_events enumeration values.
  * @index: Identifies the source of the event.
- * @timestamp: When the event occured.
+ * @timestamp: When the event occurred (%PTP_CLOCK_EXTTS only).
+ * @pps_times: When the event occurred (%PTP_CLOCK_PPSUSR only).
  */
 
 struct ptp_clock_event {
 	int type;
 	int index;
-	u64 timestamp;
+	union {
+		u64 timestamp;
+		struct pps_event_time pps_times;
+	};
 };
 
 /**
-- 
cgit v1.2.3


From 6b536b5e5e1da32f3ba1e3f42c7bf2f80d37dc6b Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Fri, 31 Aug 2012 16:39:28 +0300
Subject: Bluetooth: Remove unneeded zero init

hdev is allocated with kzalloc so zero initialization is not needed.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h | 9 ---------
 net/bluetooth/hci_core.c         | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1bbc1091748c..fa807a3fd23b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -428,15 +428,6 @@ static inline bool hci_conn_ssp_enabled(struct hci_conn *conn)
 	       test_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
 }
 
-static inline void hci_conn_hash_init(struct hci_dev *hdev)
-{
-	struct hci_conn_hash *h = &hdev->conn_hash;
-	INIT_LIST_HEAD(&h->list);
-	h->acl_num = 0;
-	h->sco_num = 0;
-	h->le_num = 0;
-}
-
 static inline void hci_conn_hash_add(struct hci_dev *hdev, struct hci_conn *c)
 {
 	struct hci_conn_hash *h = &hdev->conn_hash;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index fa974a19d365..86abe721f484 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1652,6 +1652,7 @@ struct hci_dev *hci_alloc_dev(void)
 	INIT_LIST_HEAD(&hdev->link_keys);
 	INIT_LIST_HEAD(&hdev->long_term_keys);
 	INIT_LIST_HEAD(&hdev->remote_oob_data);
+	INIT_LIST_HEAD(&hdev->conn_hash.list);
 
 	INIT_WORK(&hdev->rx_work, hci_rx_work);
 	INIT_WORK(&hdev->cmd_work, hci_cmd_work);
@@ -1674,7 +1675,6 @@ struct hci_dev *hci_alloc_dev(void)
 
 	hci_init_sysfs(hdev);
 	discovery_init(hdev);
-	hci_conn_hash_init(hdev);
 
 	return hdev;
 }
-- 
cgit v1.2.3


From 9472007c62ecc8f21daa2e1e252bf73b67e535fc Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Thu, 6 Sep 2012 15:05:43 +0300
Subject: Bluetooth: trivial: Make hci_chan_del return void

Return code is not needed in hci_chan_del

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h | 2 +-
 net/bluetooth/hci_conn.c         | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index fa807a3fd23b..4704ca4b8767 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -552,7 +552,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev);
 void hci_conn_check_pending(struct hci_dev *hdev);
 
 struct hci_chan *hci_chan_create(struct hci_conn *conn);
-int hci_chan_del(struct hci_chan *chan);
+void hci_chan_del(struct hci_chan *chan);
 void hci_chan_list_flush(struct hci_conn *conn);
 
 struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst,
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 3e65c021df50..59f0344406c8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -935,7 +935,7 @@ struct hci_chan *hci_chan_create(struct hci_conn *conn)
 	return chan;
 }
 
-int hci_chan_del(struct hci_chan *chan)
+void hci_chan_del(struct hci_chan *chan)
 {
 	struct hci_conn *conn = chan->conn;
 	struct hci_dev *hdev = conn->hdev;
@@ -948,8 +948,6 @@ int hci_chan_del(struct hci_chan *chan)
 
 	skb_queue_purge(&chan->data_q);
 	kfree(chan);
-
-	return 0;
 }
 
 void hci_chan_list_flush(struct hci_conn *conn)
-- 
cgit v1.2.3


From 376261ae3627b03574994496f70f95d5538795d5 Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Date: Thu, 6 Sep 2012 15:05:45 +0300
Subject: Bluetooth: debug: Print refcnt for hci_dev

Add debug output for HCI kref.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@intel.com>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci_core.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4704ca4b8767..6a3337e9c42c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -603,11 +603,17 @@ static inline void hci_conn_put(struct hci_conn *conn)
 /* ----- HCI Devices ----- */
 static inline void hci_dev_put(struct hci_dev *d)
 {
+	BT_DBG("%s orig refcnt %d", d->name,
+	       atomic_read(&d->dev.kobj.kref.refcount));
+
 	put_device(&d->dev);
 }
 
 static inline struct hci_dev *hci_dev_hold(struct hci_dev *d)
 {
+	BT_DBG("%s orig refcnt %d", d->name,
+	       atomic_read(&d->dev.kobj.kref.refcount));
+
 	get_device(&d->dev);
 	return d;
 }
-- 
cgit v1.2.3


From 9785e10aedfa0fad5c1aac709dce5ada1b123783 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 8 Sep 2012 02:53:53 +0000
Subject: netlink: kill netlink_set_nonroot

Replace netlink_set_nonroot by one new field `flags' in
struct netlink_kernel_cfg that is passed to netlink_kernel_create.

This patch also renames NL_NONROOT_* to NL_CFG_F_NONROOT_* since
now the flags field in nl_table is generic (so we can add more
flags if needed in the future).

Also adjust all callers in the net-next tree to use these flags
instead of netlink_set_nonroot.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h    |  9 ++++-----
 lib/kobject_uevent.c       |  2 +-
 net/core/rtnetlink.c       |  2 +-
 net/netlink/af_netlink.c   | 28 +++++++++++++---------------
 net/netlink/genetlink.c    |  3 +--
 security/selinux/netlink.c |  2 +-
 6 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index df73cf4b0290..8719a4e235a5 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -176,12 +176,16 @@ struct netlink_skb_parms {
 extern void netlink_table_grab(void);
 extern void netlink_table_ungrab(void);
 
+#define NL_CFG_F_NONROOT_RECV	(1 << 0)
+#define NL_CFG_F_NONROOT_SEND	(1 << 1)
+
 /* optional Netlink kernel configuration parameters */
 struct netlink_kernel_cfg {
 	unsigned int	groups;
 	void		(*input)(struct sk_buff *skb);
 	struct mutex	*cb_mutex;
 	void		(*bind)(int group);
+	unsigned int	flags;
 };
 
 extern struct sock *netlink_kernel_create(struct net *net, int unit,
@@ -260,11 +264,6 @@ extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 			      const struct nlmsghdr *nlh,
 			      struct netlink_dump_control *control);
 
-
-#define NL_NONROOT_RECV 0x1
-#define NL_NONROOT_SEND 0x2
-extern void netlink_set_nonroot(int protocol, unsigned flag);
-
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_NETLINK_H */
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 0401d2916d9f..c2e97787d01e 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -375,6 +375,7 @@ static int uevent_net_init(struct net *net)
 	struct uevent_sock *ue_sk;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= 1,
+		.flags	= NL_CFG_F_NONROOT_RECV,
 	};
 
 	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
@@ -422,7 +423,6 @@ static struct pernet_operations uevent_net_ops = {
 
 static int __init kobject_uevent_init(void)
 {
-	netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
 	return register_pernet_subsys(&uevent_net_ops);
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c64efcff8078..a71806eb9cc6 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2381,6 +2381,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
 		.groups		= RTNLGRP_MAX,
 		.input		= rtnetlink_rcv,
 		.cb_mutex	= &rtnl_mutex,
+		.flags		= NL_CFG_F_NONROOT_RECV,
 	};
 
 	sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg);
@@ -2416,7 +2417,6 @@ void __init rtnetlink_init(void)
 	if (register_pernet_subsys(&rtnetlink_net_ops))
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 
-	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
 
 	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f530b1ca1773..b74540ce3c14 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -121,7 +121,7 @@ struct netlink_table {
 	struct nl_pid_hash	hash;
 	struct hlist_head	mc_list;
 	struct listeners __rcu	*listeners;
-	unsigned int		nl_nonroot;
+	unsigned int		flags;
 	unsigned int		groups;
 	struct mutex		*cb_mutex;
 	struct module		*module;
@@ -536,6 +536,8 @@ static int netlink_release(struct socket *sock)
 		if (--nl_table[sk->sk_protocol].registered == 0) {
 			kfree(nl_table[sk->sk_protocol].listeners);
 			nl_table[sk->sk_protocol].module = NULL;
+			nl_table[sk->sk_protocol].bind = NULL;
+			nl_table[sk->sk_protocol].flags = 0;
 			nl_table[sk->sk_protocol].registered = 0;
 		}
 	} else if (nlk->subscriptions) {
@@ -596,7 +598,7 @@ retry:
 
 static inline int netlink_capable(const struct socket *sock, unsigned int flag)
 {
-	return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
+	return (nl_table[sock->sk->sk_protocol].flags & flag) ||
 	       capable(CAP_NET_ADMIN);
 }
 
@@ -659,7 +661,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 
 	/* Only superuser is allowed to listen multicasts */
 	if (nladdr->nl_groups) {
-		if (!netlink_capable(sock, NL_NONROOT_RECV))
+		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
 			return -EPERM;
 		err = netlink_realloc_groups(sk);
 		if (err)
@@ -721,7 +723,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 		return -EINVAL;
 
 	/* Only superuser is allowed to send multicasts */
-	if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
+	if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
 		return -EPERM;
 
 	if (!nlk->pid)
@@ -1244,7 +1246,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 		break;
 	case NETLINK_ADD_MEMBERSHIP:
 	case NETLINK_DROP_MEMBERSHIP: {
-		if (!netlink_capable(sock, NL_NONROOT_RECV))
+		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
 			return -EPERM;
 		err = netlink_realloc_groups(sk);
 		if (err)
@@ -1376,7 +1378,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		dst_group = ffs(addr->nl_groups);
 		err =  -EPERM;
 		if ((dst_group || dst_pid) &&
-		    !netlink_capable(sock, NL_NONROOT_SEND))
+		    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
 			goto out;
 	} else {
 		dst_pid = nlk->dst_pid;
@@ -1580,7 +1582,10 @@ netlink_kernel_create(struct net *net, int unit,
 		rcu_assign_pointer(nl_table[unit].listeners, listeners);
 		nl_table[unit].cb_mutex = cb_mutex;
 		nl_table[unit].module = module;
-		nl_table[unit].bind = cfg ? cfg->bind : NULL;
+		if (cfg) {
+			nl_table[unit].bind = cfg->bind;
+			nl_table[unit].flags = cfg->flags;
+		}
 		nl_table[unit].registered = 1;
 	} else {
 		kfree(listeners);
@@ -1679,13 +1684,6 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
 	netlink_table_ungrab();
 }
 
-void netlink_set_nonroot(int protocol, unsigned int flags)
-{
-	if ((unsigned int)protocol < MAX_LINKS)
-		nl_table[protocol].nl_nonroot = flags;
-}
-EXPORT_SYMBOL(netlink_set_nonroot);
-
 struct nlmsghdr *
 __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 {
@@ -2150,7 +2148,7 @@ static void __init netlink_add_usersock_entry(void)
 	rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
 	nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
 	nl_table[NETLINK_USERSOCK].registered = 1;
-	nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND;
+	nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
 
 	netlink_table_ungrab();
 }
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index fda497412fc3..c1b71aef9f71 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -918,6 +918,7 @@ static int __net_init genl_pernet_init(struct net *net)
 	struct netlink_kernel_cfg cfg = {
 		.input		= genl_rcv,
 		.cb_mutex	= &genl_mutex,
+		.flags		= NL_CFG_F_NONROOT_RECV,
 	};
 
 	/* we'll bump the group number right afterwards */
@@ -955,8 +956,6 @@ static int __init genl_init(void)
 	if (err < 0)
 		goto problem;
 
-	netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
-
 	err = register_pernet_subsys(&genl_pernet_ops);
 	if (err)
 		goto problem;
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c
index 8a77725423e0..0d2cd11f3c22 100644
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -113,13 +113,13 @@ static int __init selnl_init(void)
 {
 	struct netlink_kernel_cfg cfg = {
 		.groups	= SELNLGRP_MAX,
+		.flags	= NL_CFG_F_NONROOT_RECV,
 	};
 
 	selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX,
 				      THIS_MODULE, &cfg);
 	if (selnl == NULL)
 		panic("SELinux:  Cannot create netlink socket.");
-	netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 9f00d9776bc5beb92e8bfc884a7e96ddc5589e2e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 8 Sep 2012 02:53:54 +0000
Subject: netlink: hide struct module parameter in netlink_kernel_create

This patch defines netlink_kernel_create as a wrapper function of
__netlink_kernel_create to hide the struct module *me parameter
(which seems to be THIS_MODULE in all existing netlink subsystems).

Suggested by David S. Miller.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/crypto_user.c                |  3 +--
 drivers/connector/connector.c       |  3 +--
 drivers/infiniband/core/netlink.c   |  2 +-
 drivers/scsi/scsi_netlink.c         |  2 +-
 drivers/scsi/scsi_transport_iscsi.c |  3 +--
 drivers/staging/gdm72xx/netlink_k.c |  2 +-
 include/linux/netlink.h             | 13 ++++++++++---
 kernel/audit.c                      |  3 +--
 lib/kobject_uevent.c                |  3 +--
 net/bridge/netfilter/ebt_ulog.c     |  3 +--
 net/core/rtnetlink.c                |  2 +-
 net/core/sock_diag.c                |  3 +--
 net/decnet/netfilter/dn_rtmsg.c     |  3 +--
 net/ipv4/fib_frontend.c             |  2 +-
 net/ipv4/netfilter/ipt_ULOG.c       |  3 +--
 net/netfilter/nfnetlink.c           |  2 +-
 net/netlink/af_netlink.c            |  8 +++-----
 net/netlink/genetlink.c             |  3 +--
 net/xfrm/xfrm_user.c                |  2 +-
 security/selinux/netlink.c          |  3 +--
 20 files changed, 31 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index ba2c611154af..165914e63ef2 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -500,8 +500,7 @@ static int __init crypto_user_init(void)
 		.input	= crypto_netlink_rcv,
 	};
 
-	crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO,
-					    THIS_MODULE, &cfg);
+	crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO, &cfg);
 	if (!crypto_nlsk)
 		return -ENOMEM;
 
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 82fa4f0f91d6..965b7811e04f 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -264,8 +264,7 @@ static int __devinit cn_init(void)
 		.input	= dev->input,
 	};
 
-	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR,
-					 THIS_MODULE, &cfg);
+	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg);
 	if (!dev->nls)
 		return -EIO;
 
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 3ae2bfd31015..fe10a949aef9 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -177,7 +177,7 @@ int __init ibnl_init(void)
 		.input	= ibnl_rcv,
 	};
 
-	nls = netlink_kernel_create(&init_net, NETLINK_RDMA, THIS_MODULE, &cfg);
+	nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
 	if (!nls) {
 		pr_warn("Failed to create netlink socket\n");
 		return -ENOMEM;
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 8818dd681c19..3252bc9625ee 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -501,7 +501,7 @@ scsi_netlink_init(void)
 	}
 
 	scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT,
-					     THIS_MODULE, &cfg);
+					     &cfg);
 	if (!scsi_nl_sock) {
 		printk(KERN_ERR "%s: register of receive handler failed\n",
 				__func__);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index fa1dfaa83e32..519bd5303f3f 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -2969,8 +2969,7 @@ static __init int iscsi_transport_init(void)
 	if (err)
 		goto unregister_conn_class;
 
-	nls = netlink_kernel_create(&init_net, NETLINK_ISCSI,
-				    THIS_MODULE, &cfg);
+	nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, &cfg);
 	if (!nls) {
 		err = -ENOBUFS;
 		goto unregister_session_class;
diff --git a/drivers/staging/gdm72xx/netlink_k.c b/drivers/staging/gdm72xx/netlink_k.c
index 3abb31df8f28..2109cab0a14c 100644
--- a/drivers/staging/gdm72xx/netlink_k.c
+++ b/drivers/staging/gdm72xx/netlink_k.c
@@ -95,7 +95,7 @@ struct sock *netlink_init(int unit, void (*cb)(struct net_device *dev, u16 type,
 	init_MUTEX(&netlink_mutex);
 #endif
 
-	sock = netlink_kernel_create(&init_net, unit, THIS_MODULE, &cfg);
+	sock = netlink_kernel_create(&init_net, unit, &cfg);
 
 	if (sock)
 		rcv_cb = cb;
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 8719a4e235a5..cd17dda5a987 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -153,6 +153,7 @@ struct nlattr {
 
 #include <linux/capability.h>
 #include <linux/skbuff.h>
+#include <linux/module.h>
 #include <net/scm.h>
 
 struct net;
@@ -188,9 +189,15 @@ struct netlink_kernel_cfg {
 	unsigned int	flags;
 };
 
-extern struct sock *netlink_kernel_create(struct net *net, int unit,
-					  struct module *module,
-					  struct netlink_kernel_cfg *cfg);
+extern struct sock *__netlink_kernel_create(struct net *net, int unit,
+					    struct module *module,
+					    struct netlink_kernel_cfg *cfg);
+static inline struct sock *
+netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
+{
+	return __netlink_kernel_create(net, unit, THIS_MODULE, cfg);
+}
+
 extern void netlink_kernel_release(struct sock *sk);
 extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
diff --git a/kernel/audit.c b/kernel/audit.c
index ea3b7b6191c7..a24aafa850ae 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -971,8 +971,7 @@ static int __init audit_init(void)
 
 	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
 	       audit_default ? "enabled" : "disabled");
-	audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT,
-					   THIS_MODULE, &cfg);
+	audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, &cfg);
 	if (!audit_sock)
 		audit_panic("cannot initialize netlink socket");
 	else
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index c2e97787d01e..52e5abbc41db 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -382,8 +382,7 @@ static int uevent_net_init(struct net *net)
 	if (!ue_sk)
 		return -ENOMEM;
 
-	ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
-					  THIS_MODULE, &cfg);
+	ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, &cfg);
 	if (!ue_sk->sk) {
 		printk(KERN_ERR
 		       "kobject_uevent: unable to create netlink socket!\n");
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 19063473c71f..3476ec469740 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -298,8 +298,7 @@ static int __init ebt_ulog_init(void)
 		spin_lock_init(&ulog_buffers[i].lock);
 	}
 
-	ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
-					  THIS_MODULE, &cfg);
+	ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
 	if (!ebtulognl)
 		ret = -ENOMEM;
 	else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a71806eb9cc6..508c5df4a09c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2384,7 +2384,7 @@ static int __net_init rtnetlink_net_init(struct net *net)
 		.flags		= NL_CFG_F_NONROOT_RECV,
 	};
 
-	sk = netlink_kernel_create(net, NETLINK_ROUTE, THIS_MODULE, &cfg);
+	sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg);
 	if (!sk)
 		return -ENOMEM;
 	net->rtnl = sk;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 9d8755e4a7a5..602cd637182e 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -172,8 +172,7 @@ static int __net_init diag_net_init(struct net *net)
 		.input	= sock_diag_rcv,
 	};
 
-	net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG,
-					       THIS_MODULE, &cfg);
+	net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
 	return net->diag_nlsk == NULL ? -ENOMEM : 0;
 }
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 11db0ecf342f..dfe42012a044 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -130,8 +130,7 @@ static int __init dn_rtmsg_init(void)
 		.input	= dnrmg_receive_user_skb,
 	};
 
-	dnrmg = netlink_kernel_create(&init_net,
-				      NETLINK_DNRTMSG, THIS_MODULE, &cfg);
+	dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg);
 	if (dnrmg == NULL) {
 		printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
 		return -ENOMEM;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8a7cd795a96e..dc1f10ed1872 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -986,7 +986,7 @@ static int __net_init nl_fib_lookup_init(struct net *net)
 		.input	= nl_fib_input,
 	};
 
-	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, THIS_MODULE, &cfg);
+	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, &cfg);
 	if (sk == NULL)
 		return -EAFNOSUPPORT;
 	net->ipv4.fibnl = sk;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 1109f7f6c254..b5ef3cba2250 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -396,8 +396,7 @@ static int __init ulog_tg_init(void)
 	for (i = 0; i < ULOG_MAXNLGROUPS; i++)
 		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
 
-	nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
-					THIS_MODULE, &cfg);
+	nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
 	if (!nflognl)
 		return -ENOMEM;
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index a26503342e71..ffb92c03a358 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -241,7 +241,7 @@ static int __net_init nfnetlink_net_init(struct net *net)
 #endif
 	};
 
-	nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, THIS_MODULE, &cfg);
+	nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg);
 	if (!nfnl)
 		return -ENOMEM;
 	net->nfnl_stash = nfnl;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b74540ce3c14..4d348e97e131 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1526,9 +1526,8 @@ static void netlink_data_ready(struct sock *sk, int len)
  */
 
 struct sock *
-netlink_kernel_create(struct net *net, int unit,
-		      struct module *module,
-		      struct netlink_kernel_cfg *cfg)
+__netlink_kernel_create(struct net *net, int unit, struct module *module,
+			struct netlink_kernel_cfg *cfg)
 {
 	struct socket *sock;
 	struct sock *sk;
@@ -1603,8 +1602,7 @@ out_sock_release_nosk:
 	sock_release(sock);
 	return NULL;
 }
-EXPORT_SYMBOL(netlink_kernel_create);
-
+EXPORT_SYMBOL(__netlink_kernel_create);
 
 void
 netlink_kernel_release(struct sock *sk)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c1b71aef9f71..19288b7d6135 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -922,8 +922,7 @@ static int __net_init genl_pernet_init(struct net *net)
 	};
 
 	/* we'll bump the group number right afterwards */
-	net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC,
-					       THIS_MODULE, &cfg);
+	net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, &cfg);
 
 	if (!net->genl_sock && net_eq(net, &init_net))
 		panic("GENL: Cannot initialize generic netlink\n");
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ab58034c42d6..354070adb5ef 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2963,7 +2963,7 @@ static int __net_init xfrm_user_net_init(struct net *net)
 		.input	= xfrm_netlink_rcv,
 	};
 
-	nlsk = netlink_kernel_create(net, NETLINK_XFRM, THIS_MODULE, &cfg);
+	nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg);
 	if (nlsk == NULL)
 		return -ENOMEM;
 	net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c
index 0d2cd11f3c22..14d810ead420 100644
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -116,8 +116,7 @@ static int __init selnl_init(void)
 		.flags	= NL_CFG_F_NONROOT_RECV,
 	};
 
-	selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX,
-				      THIS_MODULE, &cfg);
+	selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX, &cfg);
 	if (selnl == NULL)
 		panic("SELinux:  Cannot create netlink socket.");
 	return 0;
-- 
cgit v1.2.3


From e548c49e6dc6b08b59042930a2e90c69c13c9293 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 4 Sep 2012 17:08:23 +0200
Subject: mac80211: add key flag for management keys

Mark keys that might be used to receive management
frames so drivers can fall back on software crypto
for them if they don't support hardware offload.
As the new flag is only set correctly for RX keys
and the existing IEEE80211_KEY_FLAG_SW_MGMT flag
can only affect TX, also rename the latter to
IEEE80211_KEY_FLAG_SW_MGMT_TX.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath5k/mac80211-ops.c |  2 +-
 drivers/net/wireless/ath/ath9k/htc_drv_main.c |  2 +-
 drivers/net/wireless/ath/ath9k/main.c         |  2 +-
 include/net/mac80211.h                        | 12 ++++++++--
 net/mac80211/cfg.c                            | 32 +++++++++++++++++++++++++++
 net/mac80211/tx.c                             |  2 +-
 6 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
index c89fa6ead615..155c70e4c26c 100644
--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c
+++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
@@ -524,7 +524,7 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 			if (key->cipher == WLAN_CIPHER_SUITE_TKIP)
 				key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC;
 			if (key->cipher == WLAN_CIPHER_SUITE_CCMP)
-				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT;
+				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
 			ret = 0;
 		}
 		break;
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index c32f6e3ffb18..a6bb6e3698ca 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -1449,7 +1449,7 @@ static int ath9k_htc_set_key(struct ieee80211_hw *hw,
 				key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC;
 			if (priv->ah->sw_mgmt_crypto &&
 			    key->cipher == WLAN_CIPHER_SUITE_CCMP)
-				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT;
+				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
 			ret = 0;
 		}
 		break;
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 4d8dc9ff5a75..06c628e85a43 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1441,7 +1441,7 @@ static int ath9k_set_key(struct ieee80211_hw *hw,
 				key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC;
 			if (sc->sc_ah->sw_mgmt_crypto &&
 			    key->cipher == WLAN_CIPHER_SUITE_CCMP)
-				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT;
+				key->flags |= IEEE80211_KEY_FLAG_SW_MGMT_TX;
 			ret = 0;
 		}
 		break;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 71f8262fc1df..82558c8decf8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -973,21 +973,29 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
  *	generation in software.
  * @IEEE80211_KEY_FLAG_PAIRWISE: Set by mac80211, this flag indicates
  *	that the key is pairwise rather then a shared key.
- * @IEEE80211_KEY_FLAG_SW_MGMT: This flag should be set by the driver for a
+ * @IEEE80211_KEY_FLAG_SW_MGMT_TX: This flag should be set by the driver for a
  *	CCMP key if it requires CCMP encryption of management frames (MFP) to
  *	be done in software.
  * @IEEE80211_KEY_FLAG_PUT_IV_SPACE: This flag should be set by the driver
  *	if space should be prepared for the IV, but the IV
  *	itself should not be generated. Do not set together with
  *	@IEEE80211_KEY_FLAG_GENERATE_IV on the same key.
+ * @IEEE80211_KEY_FLAG_RX_MGMT: This key will be used to decrypt received
+ *	management frames. The flag can help drivers that have a hardware
+ *	crypto implementation that doesn't deal with management frames
+ *	properly by allowing them to not upload the keys to hardware and
+ *	fall back to software crypto. Note that this flag deals only with
+ *	RX, if your crypto engine can't deal with TX you can also set the
+ *	%IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW.
  */
 enum ieee80211_key_flags {
 	IEEE80211_KEY_FLAG_WMM_STA	= 1<<0,
 	IEEE80211_KEY_FLAG_GENERATE_IV	= 1<<1,
 	IEEE80211_KEY_FLAG_GENERATE_MMIC= 1<<2,
 	IEEE80211_KEY_FLAG_PAIRWISE	= 1<<3,
-	IEEE80211_KEY_FLAG_SW_MGMT	= 1<<4,
+	IEEE80211_KEY_FLAG_SW_MGMT_TX	= 1<<4,
 	IEEE80211_KEY_FLAG_PUT_IV_SPACE = 1<<5,
+	IEEE80211_KEY_FLAG_RX_MGMT	= 1<<6,
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 03fe6d1cff42..00e31b488adc 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -170,6 +170,38 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 		}
 	}
 
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_STATION:
+		if (sdata->u.mgd.mfp != IEEE80211_MFP_DISABLED)
+			key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
+		break;
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+		/* Keys without a station are used for TX only */
+		if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP))
+			key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		/* no MFP (yet) */
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+#ifdef CONFIG_MAC80211_MESH
+		if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)
+			key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
+		break;
+#endif
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_UNSPECIFIED:
+	case NUM_NL80211_IFTYPES:
+	case NL80211_IFTYPE_P2P_CLIENT:
+	case NL80211_IFTYPE_P2P_GO:
+		/* shouldn't happen */
+		WARN_ON_ONCE(1);
+		break;
+	}
+
 	err = ieee80211_key_link(key, sdata, sta);
 	if (err)
 		ieee80211_key_free(sdata->local, key);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 29eb4e678235..e0e0d1d0e830 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -580,7 +580,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 				tx->key = NULL;
 			else
 				skip_hw = (tx->key->conf.flags &
-					   IEEE80211_KEY_FLAG_SW_MGMT) &&
+					   IEEE80211_KEY_FLAG_SW_MGMT_TX) &&
 					ieee80211_is_mgmt(hdr->frame_control);
 			break;
 		case WLAN_CIPHER_SUITE_AES_CMAC:
-- 
cgit v1.2.3


From 15e473046cb6e5d18a4d0057e61d76315230382b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 7 Sep 2012 20:12:54 +0000
Subject: netlink: Rename pid to portid to avoid confusion

It is a frequent mistake to confuse the netlink port identifier with a
process identifier.  Try to reduce this confusion by renaming fields
that hold port identifiers portid instead of pid.

I have carefully avoided changing the structures exported to
userspace to avoid changing the userspace API.

I have successfully built an allyesconfig kernel with this change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/crypto_user.c                        |   4 +-
 drivers/net/team/team.c                     |  38 +++---
 drivers/net/wireless/mac80211_hwsim.c       |  46 ++++----
 drivers/scsi/scsi_transport_iscsi.c         |   4 +-
 drivers/staging/gdm72xx/netlink_k.c         |   2 +-
 fs/dlm/netlink.c                            |   8 +-
 include/linux/netlink.h                     |  14 +--
 include/net/cfg80211.h                      |   2 +-
 include/net/genetlink.h                     |  34 +++---
 include/net/netfilter/nf_conntrack_ecache.h |  32 +++---
 include/net/netlink.h                       |  26 ++---
 include/net/nfc/nfc.h                       |   2 +-
 include/net/xfrm.h                          |   6 +-
 kernel/audit.c                              |  20 ++--
 kernel/taskstats.c                          |   4 +-
 net/bridge/br_fdb.c                         |   6 +-
 net/bridge/br_netlink.c                     |   2 +-
 net/can/gw.c                                |   2 +-
 net/core/fib_rules.c                        |   6 +-
 net/core/neighbour.c                        |   8 +-
 net/core/rtnetlink.c                        |  12 +-
 net/dcb/dcbnl.c                             |  18 +--
 net/decnet/dn_dev.c                         |   6 +-
 net/decnet/dn_route.c                       |  10 +-
 net/decnet/dn_table.c                       |  12 +-
 net/ieee802154/nl-mac.c                     |   6 +-
 net/ieee802154/nl-phy.c                     |   6 +-
 net/ipv4/devinet.c                          |  28 ++---
 net/ipv4/fib_frontend.c                     |  10 +-
 net/ipv4/fib_semantics.c                    |   8 +-
 net/ipv4/fib_trie.c                         |   2 +-
 net/ipv4/inet_diag.c                        |  32 +++---
 net/ipv4/ipmr.c                             |  10 +-
 net/ipv4/route.c                            |   8 +-
 net/ipv4/tcp_metrics.c                      |   2 +-
 net/ipv4/udp_diag.c                         |   6 +-
 net/ipv6/addrconf.c                         |  32 +++---
 net/ipv6/addrlabel.c                        |  10 +-
 net/ipv6/ip6mr.c                            |  10 +-
 net/ipv6/route.c                            |  20 ++--
 net/irda/irnetlink.c                        |   2 +-
 net/key/af_key.c                            |  32 +++---
 net/l2tp/l2tp_netlink.c                     |  24 ++--
 net/netfilter/ipset/ip_set_core.c           |  24 ++--
 net/netfilter/ipvs/ip_vs_ctl.c              |   6 +-
 net/netfilter/nf_conntrack_ecache.c         |   2 +-
 net/netfilter/nf_conntrack_netlink.c        |  78 ++++++-------
 net/netfilter/nfnetlink_acct.c              |  12 +-
 net/netfilter/nfnetlink_cthelper.c          |  12 +-
 net/netfilter/nfnetlink_cttimeout.c         |  12 +-
 net/netfilter/nfnetlink_log.c               |  18 +--
 net/netfilter/nfnetlink_queue_core.c        |  28 ++---
 net/netlabel/netlabel_cipso_v4.c            |   2 +-
 net/netlabel/netlabel_mgmt.c                |   4 +-
 net/netlabel/netlabel_unlabeled.c           |   2 +-
 net/netlink/af_netlink.c                    | 172 ++++++++++++++--------------
 net/netlink/genetlink.c                     |  42 +++----
 net/nfc/netlink.c                           |  26 ++---
 net/openvswitch/actions.c                   |   4 +-
 net/openvswitch/datapath.c                  |  84 +++++++-------
 net/openvswitch/datapath.h                  |   2 +-
 net/openvswitch/vport.c                     |   2 +-
 net/openvswitch/vport.h                     |   6 +-
 net/packet/diag.c                           |   6 +-
 net/phonet/pn_netlink.c                     |  14 +--
 net/sched/act_api.c                         |  52 ++++-----
 net/sched/cls_api.c                         |  14 +--
 net/sched/sch_api.c                         |  44 +++----
 net/tipc/netlink.c                          |   2 +-
 net/unix/diag.c                             |  14 +--
 net/wireless/core.h                         |   2 +-
 net/wireless/mlme.c                         |  16 +--
 net/wireless/nl80211.c                      | 110 +++++++++---------
 net/xfrm/xfrm_state.c                       |  10 +-
 net/xfrm/xfrm_user.c                        |  62 +++++-----
 75 files changed, 728 insertions(+), 728 deletions(-)

(limited to 'include')

diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 165914e63ef2..6bba414d0c61 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -166,7 +166,7 @@ static int crypto_report_alg(struct crypto_alg *alg,
 	struct crypto_user_alg *ualg;
 	int err = 0;
 
-	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, info->nlmsg_seq,
+	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
 			CRYPTO_MSG_GETALG, sizeof(*ualg), info->nlmsg_flags);
 	if (!nlh) {
 		err = -EMSGSIZE;
@@ -216,7 +216,7 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 	if (err)
 		return err;
 
-	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).pid);
+	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
 }
 
 static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index b4f67b55ef79..266af7b38ebc 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1886,7 +1886,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
 			  &team_nl_family, 0, TEAM_CMD_NOOP);
 	if (IS_ERR(hdr)) {
 		err = PTR_ERR(hdr);
@@ -1895,7 +1895,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
 
 	genlmsg_end(msg, hdr);
 
-	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
 
 err_msg_put:
 	nlmsg_free(msg);
@@ -1952,7 +1952,7 @@ static int team_nl_send_generic(struct genl_info *info, struct team *team,
 	if (err < 0)
 		goto err_fill;
 
-	err = genlmsg_unicast(genl_info_net(info), skb, info->snd_pid);
+	err = genlmsg_unicast(genl_info_net(info), skb, info->snd_portid);
 	return err;
 
 err_fill:
@@ -1961,11 +1961,11 @@ err_fill:
 }
 
 typedef int team_nl_send_func_t(struct sk_buff *skb,
-				struct team *team, u32 pid);
+				struct team *team, u32 portid);
 
-static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 pid)
+static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 portid)
 {
-	return genlmsg_unicast(dev_net(team->dev), skb, pid);
+	return genlmsg_unicast(dev_net(team->dev), skb, portid);
 }
 
 static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team,
@@ -2050,13 +2050,13 @@ nest_cancel:
 }
 
 static int __send_and_alloc_skb(struct sk_buff **pskb,
-				struct team *team, u32 pid,
+				struct team *team, u32 portid,
 				team_nl_send_func_t *send_func)
 {
 	int err;
 
 	if (*pskb) {
-		err = send_func(*pskb, team, pid);
+		err = send_func(*pskb, team, portid);
 		if (err)
 			return err;
 	}
@@ -2066,7 +2066,7 @@ static int __send_and_alloc_skb(struct sk_buff **pskb,
 	return 0;
 }
 
-static int team_nl_send_options_get(struct team *team, u32 pid, u32 seq,
+static int team_nl_send_options_get(struct team *team, u32 portid, u32 seq,
 				    int flags, team_nl_send_func_t *send_func,
 				    struct list_head *sel_opt_inst_list)
 {
@@ -2083,11 +2083,11 @@ static int team_nl_send_options_get(struct team *team, u32 pid, u32 seq,
 				    struct team_option_inst, tmp_list);
 
 start_again:
-	err = __send_and_alloc_skb(&skb, team, pid, send_func);
+	err = __send_and_alloc_skb(&skb, team, portid, send_func);
 	if (err)
 		return err;
 
-	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags | NLM_F_MULTI,
+	hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI,
 			  TEAM_CMD_OPTIONS_GET);
 	if (IS_ERR(hdr))
 		return PTR_ERR(hdr);
@@ -2120,15 +2120,15 @@ start_again:
 		goto start_again;
 
 send_done:
-	nlh = nlmsg_put(skb, pid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI);
+	nlh = nlmsg_put(skb, portid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI);
 	if (!nlh) {
-		err = __send_and_alloc_skb(&skb, team, pid, send_func);
+		err = __send_and_alloc_skb(&skb, team, portid, send_func);
 		if (err)
 			goto errout;
 		goto send_done;
 	}
 
-	return send_func(skb, team, pid);
+	return send_func(skb, team, portid);
 
 nla_put_failure:
 	err = -EMSGSIZE;
@@ -2151,7 +2151,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
 
 	list_for_each_entry(opt_inst, &team->option_inst_list, list)
 		list_add_tail(&opt_inst->tmp_list, &sel_opt_inst_list);
-	err = team_nl_send_options_get(team, info->snd_pid, info->snd_seq,
+	err = team_nl_send_options_get(team, info->snd_portid, info->snd_seq,
 				       NLM_F_ACK, team_nl_send_unicast,
 				       &sel_opt_inst_list);
 
@@ -2305,7 +2305,7 @@ team_put:
 }
 
 static int team_nl_fill_port_list_get(struct sk_buff *skb,
-				      u32 pid, u32 seq, int flags,
+				      u32 portid, u32 seq, int flags,
 				      struct team *team,
 				      bool fillall)
 {
@@ -2313,7 +2313,7 @@ static int team_nl_fill_port_list_get(struct sk_buff *skb,
 	void *hdr;
 	struct team_port *port;
 
-	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags,
+	hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags,
 			  TEAM_CMD_PORT_LIST_GET);
 	if (IS_ERR(hdr))
 		return PTR_ERR(hdr);
@@ -2362,7 +2362,7 @@ static int team_nl_fill_port_list_get_all(struct sk_buff *skb,
 					  struct genl_info *info, int flags,
 					  struct team *team)
 {
-	return team_nl_fill_port_list_get(skb, info->snd_pid,
+	return team_nl_fill_port_list_get(skb, info->snd_portid,
 					  info->snd_seq, NLM_F_ACK,
 					  team, true);
 }
@@ -2415,7 +2415,7 @@ static struct genl_multicast_group team_change_event_mcgrp = {
 };
 
 static int team_nl_send_multicast(struct sk_buff *skb,
-				  struct team *team, u32 pid)
+				  struct team *team, u32 portid)
 {
 	return genlmsg_multicast_netns(dev_net(team->dev), skb, 0,
 				       team_change_event_mcgrp.id, GFP_KERNEL);
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 72b0456e41bf..9d45b3bb974c 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -38,7 +38,7 @@ MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("Software simulator of 802.11 radio(s) for mac80211");
 MODULE_LICENSE("GPL");
 
-static u32 wmediumd_pid;
+static u32 wmediumd_portid;
 
 static int radios = 2;
 module_param(radios, int, 0444);
@@ -545,7 +545,7 @@ static bool mac80211_hwsim_addr_match(struct mac80211_hwsim_data *data,
 
 static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
 				       struct sk_buff *my_skb,
-				       int dst_pid)
+				       int dst_portid)
 {
 	struct sk_buff *skb;
 	struct mac80211_hwsim_data *data = hw->priv;
@@ -619,7 +619,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
 		goto nla_put_failure;
 
 	genlmsg_end(skb, msg_head);
-	genlmsg_unicast(&init_net, skb, dst_pid);
+	genlmsg_unicast(&init_net, skb, dst_portid);
 
 	/* Enqueue the packet */
 	skb_queue_tail(&data->pending, my_skb);
@@ -715,7 +715,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
 {
 	bool ack;
 	struct ieee80211_tx_info *txi;
-	u32 _pid;
+	u32 _portid;
 
 	mac80211_hwsim_monitor_rx(hw, skb);
 
@@ -726,10 +726,10 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
 	}
 
 	/* wmediumd mode check */
-	_pid = ACCESS_ONCE(wmediumd_pid);
+	_portid = ACCESS_ONCE(wmediumd_portid);
 
-	if (_pid)
-		return mac80211_hwsim_tx_frame_nl(hw, skb, _pid);
+	if (_portid)
+		return mac80211_hwsim_tx_frame_nl(hw, skb, _portid);
 
 	/* NO wmediumd detected, perfect medium simulation */
 	ack = mac80211_hwsim_tx_frame_no_nl(hw, skb);
@@ -814,7 +814,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac,
 	struct ieee80211_hw *hw = arg;
 	struct sk_buff *skb;
 	struct ieee80211_tx_info *info;
-	u32 _pid;
+	u32 _portid;
 
 	hwsim_check_magic(vif);
 
@@ -831,10 +831,10 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac,
 	mac80211_hwsim_monitor_rx(hw, skb);
 
 	/* wmediumd mode check */
-	_pid = ACCESS_ONCE(wmediumd_pid);
+	_portid = ACCESS_ONCE(wmediumd_portid);
 
-	if (_pid)
-		return mac80211_hwsim_tx_frame_nl(hw, skb, _pid);
+	if (_portid)
+		return mac80211_hwsim_tx_frame_nl(hw, skb, _portid);
 
 	mac80211_hwsim_tx_frame_no_nl(hw, skb);
 	dev_kfree_skb(skb);
@@ -1315,7 +1315,7 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif)
 	struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
 	struct sk_buff *skb;
 	struct ieee80211_pspoll *pspoll;
-	u32 _pid;
+	u32 _portid;
 
 	if (!vp->assoc)
 		return;
@@ -1336,10 +1336,10 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif)
 	memcpy(pspoll->ta, mac, ETH_ALEN);
 
 	/* wmediumd mode check */
-	_pid = ACCESS_ONCE(wmediumd_pid);
+	_portid = ACCESS_ONCE(wmediumd_portid);
 
-	if (_pid)
-		return mac80211_hwsim_tx_frame_nl(data->hw, skb, _pid);
+	if (_portid)
+		return mac80211_hwsim_tx_frame_nl(data->hw, skb, _portid);
 
 	if (!mac80211_hwsim_tx_frame_no_nl(data->hw, skb))
 		printk(KERN_DEBUG "%s: PS-poll frame not ack'ed\n", __func__);
@@ -1353,7 +1353,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
 	struct hwsim_vif_priv *vp = (void *)vif->drv_priv;
 	struct sk_buff *skb;
 	struct ieee80211_hdr *hdr;
-	u32 _pid;
+	u32 _portid;
 
 	if (!vp->assoc)
 		return;
@@ -1375,10 +1375,10 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
 	memcpy(hdr->addr3, vp->bssid, ETH_ALEN);
 
 	/* wmediumd mode check */
-	_pid = ACCESS_ONCE(wmediumd_pid);
+	_portid = ACCESS_ONCE(wmediumd_portid);
 
-	if (_pid)
-		return mac80211_hwsim_tx_frame_nl(data->hw, skb, _pid);
+	if (_portid)
+		return mac80211_hwsim_tx_frame_nl(data->hw, skb, _portid);
 
 	if (!mac80211_hwsim_tx_frame_no_nl(data->hw, skb))
 		printk(KERN_DEBUG "%s: nullfunc frame not ack'ed\n", __func__);
@@ -1632,10 +1632,10 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2,
 	if (info == NULL)
 		goto out;
 
-	wmediumd_pid = info->snd_pid;
+	wmediumd_portid = info->snd_portid;
 
 	printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, "
-	       "switching to wmediumd mode with pid %d\n", info->snd_pid);
+	       "switching to wmediumd mode with pid %d\n", info->snd_portid);
 
 	return 0;
 out:
@@ -1672,10 +1672,10 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb,
 	if (state != NETLINK_URELEASE)
 		return NOTIFY_DONE;
 
-	if (notify->pid == wmediumd_pid) {
+	if (notify->portid == wmediumd_portid) {
 		printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink"
 		       " socket, switching to perfect channel medium\n");
-		wmediumd_pid = 0;
+		wmediumd_portid = 0;
 	}
 	return NOTIFY_DONE;
 
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 519bd5303f3f..31969f2e13ce 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -2119,7 +2119,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 	switch (nlh->nlmsg_type) {
 	case ISCSI_UEVENT_CREATE_SESSION:
 		err = iscsi_if_create_session(priv, ep, ev,
-					      NETLINK_CB(skb).pid,
+					      NETLINK_CB(skb).portid,
 					      ev->u.c_session.initial_cmdsn,
 					      ev->u.c_session.cmds_max,
 					      ev->u.c_session.queue_depth);
@@ -2132,7 +2132,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 		}
 
 		err = iscsi_if_create_session(priv, ep, ev,
-					NETLINK_CB(skb).pid,
+					NETLINK_CB(skb).portid,
 					ev->u.c_bound_session.initial_cmdsn,
 					ev->u.c_bound_session.cmds_max,
 					ev->u.c_bound_session.queue_depth);
diff --git a/drivers/staging/gdm72xx/netlink_k.c b/drivers/staging/gdm72xx/netlink_k.c
index 2109cab0a14c..20d0aec52e72 100644
--- a/drivers/staging/gdm72xx/netlink_k.c
+++ b/drivers/staging/gdm72xx/netlink_k.c
@@ -135,7 +135,7 @@ int netlink_send(struct sock *sock, int group, u16 type, void *msg, int len)
 	}
 	memcpy(nlmsg_data(nlh), msg, len);
 
-	NETLINK_CB(skb).pid = 0;
+	NETLINK_CB(skb).portid = 0;
 	NETLINK_CB(skb).dst_group = 0;
 
 	ret = netlink_broadcast(sock, skb, 0, group+1, GFP_ATOMIC);
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index ef17e0169da1..60a327863b11 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -14,7 +14,7 @@
 #include "dlm_internal.h"
 
 static uint32_t dlm_nl_seqnum;
-static uint32_t listener_nlpid;
+static uint32_t listener_nlportid;
 
 static struct genl_family family = {
 	.id		= GENL_ID_GENERATE,
@@ -64,13 +64,13 @@ static int send_data(struct sk_buff *skb)
 		return rv;
 	}
 
-	return genlmsg_unicast(&init_net, skb, listener_nlpid);
+	return genlmsg_unicast(&init_net, skb, listener_nlportid);
 }
 
 static int user_cmd(struct sk_buff *skb, struct genl_info *info)
 {
-	listener_nlpid = info->snd_pid;
-	printk("user_cmd nlpid %u\n", listener_nlpid);
+	listener_nlportid = info->snd_portid;
+	printk("user_cmd nlpid %u\n", listener_nlportid);
 	return 0;
 }
 
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index cd17dda5a987..73ade5fbc856 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -165,7 +165,7 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
 
 struct netlink_skb_parms {
 	struct scm_creds	creds;		/* Skb credentials	*/
-	__u32			pid;
+	__u32			portid;
 	__u32			dst_group;
 	struct sock		*ssk;
 };
@@ -205,14 +205,14 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group)
 extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
-extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
-extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
+extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
+extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
 			     __u32 group, gfp_t allocation);
 extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
-	__u32 pid, __u32 group, gfp_t allocation,
+	__u32 portid, __u32 group, gfp_t allocation,
 	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
 	void *filter_data);
-extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
+extern int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code);
 extern int netlink_register_notifier(struct notifier_block *nb);
 extern int netlink_unregister_notifier(struct notifier_block *nb);
 
@@ -253,12 +253,12 @@ struct netlink_callback {
 
 struct netlink_notify {
 	struct net *net;
-	int pid;
+	int portid;
 	int protocol;
 };
 
 struct nlmsghdr *
-__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags);
+__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags);
 
 struct netlink_dump_control {
 	int (*dump)(struct sk_buff *skb, struct netlink_callback *);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ba2e6160fad1..7b3b0568d289 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2458,7 +2458,7 @@ struct wireless_dev {
 
 	int beacon_interval;
 
-	u32 ap_unexpected_nlpid;
+	u32 ap_unexpected_nlportid;
 
 #ifdef CONFIG_CFG80211_WEXT
 	/* wext data */
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 48905cd3884c..bdfbe68c1c3b 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -65,7 +65,7 @@ struct genl_family {
 /**
  * struct genl_info - receiving information
  * @snd_seq: sending sequence number
- * @snd_pid: netlink pid of sender
+ * @snd_portid: netlink portid of sender
  * @nlhdr: netlink message header
  * @genlhdr: generic netlink message header
  * @userhdr: user specific header
@@ -75,7 +75,7 @@ struct genl_family {
  */
 struct genl_info {
 	u32			snd_seq;
-	u32			snd_pid;
+	u32			snd_portid;
 	struct nlmsghdr *	nlhdr;
 	struct genlmsghdr *	genlhdr;
 	void *			userhdr;
@@ -130,10 +130,10 @@ extern int genl_register_mc_group(struct genl_family *family,
 				  struct genl_multicast_group *grp);
 extern void genl_unregister_mc_group(struct genl_family *family,
 				     struct genl_multicast_group *grp);
-extern void genl_notify(struct sk_buff *skb, struct net *net, u32 pid,
+extern void genl_notify(struct sk_buff *skb, struct net *net, u32 portid,
 			u32 group, struct nlmsghdr *nlh, gfp_t flags);
 
-void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
+void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
 				struct genl_family *family, int flags, u8 cmd);
 
 /**
@@ -183,7 +183,7 @@ static inline void *genlmsg_put_reply(struct sk_buff *skb,
 				      struct genl_family *family,
 				      int flags, u8 cmd)
 {
-	return genlmsg_put(skb, info->snd_pid, info->snd_seq, family,
+	return genlmsg_put(skb, info->snd_portid, info->snd_seq, family,
 			   flags, cmd);
 }
 
@@ -212,49 +212,49 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
  * genlmsg_multicast_netns - multicast a netlink message to a specific netns
  * @net: the net namespace
  * @skb: netlink message as socket buffer
- * @pid: own netlink pid to avoid sending to yourself
+ * @portid: own netlink portid to avoid sending to yourself
  * @group: multicast group id
  * @flags: allocation flags
  */
 static inline int genlmsg_multicast_netns(struct net *net, struct sk_buff *skb,
-					  u32 pid, unsigned int group, gfp_t flags)
+					  u32 portid, unsigned int group, gfp_t flags)
 {
-	return nlmsg_multicast(net->genl_sock, skb, pid, group, flags);
+	return nlmsg_multicast(net->genl_sock, skb, portid, group, flags);
 }
 
 /**
  * genlmsg_multicast - multicast a netlink message to the default netns
  * @skb: netlink message as socket buffer
- * @pid: own netlink pid to avoid sending to yourself
+ * @portid: own netlink portid to avoid sending to yourself
  * @group: multicast group id
  * @flags: allocation flags
  */
-static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid,
+static inline int genlmsg_multicast(struct sk_buff *skb, u32 portid,
 				    unsigned int group, gfp_t flags)
 {
-	return genlmsg_multicast_netns(&init_net, skb, pid, group, flags);
+	return genlmsg_multicast_netns(&init_net, skb, portid, group, flags);
 }
 
 /**
  * genlmsg_multicast_allns - multicast a netlink message to all net namespaces
  * @skb: netlink message as socket buffer
- * @pid: own netlink pid to avoid sending to yourself
+ * @portid: own netlink portid to avoid sending to yourself
  * @group: multicast group id
  * @flags: allocation flags
  *
  * This function must hold the RTNL or rcu_read_lock().
  */
-int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid,
+int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid,
 			    unsigned int group, gfp_t flags);
 
 /**
  * genlmsg_unicast - unicast a netlink message
  * @skb: netlink message as socket buffer
- * @pid: netlink pid of the destination socket
+ * @portid: netlink portid of the destination socket
  */
-static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid)
+static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 portid)
 {
-	return nlmsg_unicast(net->genl_sock, skb, pid);
+	return nlmsg_unicast(net->genl_sock, skb, portid);
 }
 
 /**
@@ -264,7 +264,7 @@ static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid)
  */
 static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info)
 {
-	return genlmsg_unicast(genl_info_net(info), skb, info->snd_pid);
+	return genlmsg_unicast(genl_info_net(info), skb, info->snd_portid);
 }
 
 /**
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 4a045cda9c60..5654d292efd4 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -17,7 +17,7 @@ struct nf_conntrack_ecache {
 	unsigned long missed;	/* missed events */
 	u16 ctmask;		/* bitmask of ct events to be delivered */
 	u16 expmask;		/* bitmask of expect events to be delivered */
-	u32 pid;		/* netlink pid of destroyer */
+	u32 portid;		/* netlink portid of destroyer */
 	struct timer_list timeout;
 };
 
@@ -60,7 +60,7 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
 /* This structure is passed to event handler */
 struct nf_ct_event {
 	struct nf_conn *ct;
-	u32 pid;
+	u32 portid;
 	int report;
 };
 
@@ -92,7 +92,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 static inline int
 nf_conntrack_eventmask_report(unsigned int eventmask,
 			      struct nf_conn *ct,
-			      u32 pid,
+			      u32 portid,
 			      int report)
 {
 	int ret = 0;
@@ -112,11 +112,11 @@ nf_conntrack_eventmask_report(unsigned int eventmask,
 	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
 		struct nf_ct_event item = {
 			.ct 	= ct,
-			.pid	= e->pid ? e->pid : pid,
+			.portid	= e->portid ? e->portid : portid,
 			.report = report
 		};
 		/* This is a resent of a destroy event? If so, skip missed */
-		unsigned long missed = e->pid ? 0 : e->missed;
+		unsigned long missed = e->portid ? 0 : e->missed;
 
 		if (!((eventmask | missed) & e->ctmask))
 			goto out_unlock;
@@ -126,11 +126,11 @@ nf_conntrack_eventmask_report(unsigned int eventmask,
 			spin_lock_bh(&ct->lock);
 			if (ret < 0) {
 				/* This is a destroy event that has been
-				 * triggered by a process, we store the PID
+				 * triggered by a process, we store the PORTID
 				 * to include it in the retransmission. */
 				if (eventmask & (1 << IPCT_DESTROY) &&
-				    e->pid == 0 && pid != 0)
-					e->pid = pid;
+				    e->portid == 0 && portid != 0)
+					e->portid = portid;
 				else
 					e->missed |= eventmask;
 			} else
@@ -145,9 +145,9 @@ out_unlock:
 
 static inline int
 nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
-			  u32 pid, int report)
+			  u32 portid, int report)
 {
-	return nf_conntrack_eventmask_report(1 << event, ct, pid, report);
+	return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
 }
 
 static inline int
@@ -158,7 +158,7 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
 
 struct nf_exp_event {
 	struct nf_conntrack_expect *exp;
-	u32 pid;
+	u32 portid;
 	int report;
 };
 
@@ -172,7 +172,7 @@ extern void nf_ct_expect_unregister_notifier(struct net *net, struct nf_exp_even
 static inline void
 nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 			  struct nf_conntrack_expect *exp,
-			  u32 pid,
+			  u32 portid,
 			  int report)
 {
 	struct net *net = nf_ct_exp_net(exp);
@@ -191,7 +191,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
 	if (e->expmask & (1 << event)) {
 		struct nf_exp_event item = {
 			.exp	= exp,
-			.pid	= pid,
+			.portid	= portid,
 			.report = report
 		};
 		notify->fcn(1 << event, &item);
@@ -216,20 +216,20 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
 					    struct nf_conn *ct) {}
 static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
 						struct nf_conn *ct,
-						u32 pid,
+						u32 portid,
 						int report) { return 0; }
 static inline int nf_conntrack_event(enum ip_conntrack_events event,
 				     struct nf_conn *ct) { return 0; }
 static inline int nf_conntrack_event_report(enum ip_conntrack_events event,
 					    struct nf_conn *ct,
-					    u32 pid,
+					    u32 portid,
 					    int report) { return 0; }
 static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
 static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
 				      struct nf_conntrack_expect *exp) {}
 static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
 					     struct nf_conntrack_expect *exp,
- 					     u32 pid,
+ 					     u32 portid,
  					     int report) {}
 
 static inline int nf_conntrack_ecache_init(struct net *net)
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 09175d5d1fbf..9690b0f6698a 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -217,19 +217,19 @@ struct nla_policy {
 /**
  * struct nl_info - netlink source information
  * @nlh: Netlink message header of original request
- * @pid: Netlink PID of requesting application
+ * @portid: Netlink PORTID of requesting application
  */
 struct nl_info {
 	struct nlmsghdr		*nlh;
 	struct net		*nl_net;
-	u32			pid;
+	u32			portid;
 };
 
 extern int		netlink_rcv_skb(struct sk_buff *skb,
 					int (*cb)(struct sk_buff *,
 						  struct nlmsghdr *));
 extern int		nlmsg_notify(struct sock *sk, struct sk_buff *skb,
-				     u32 pid, unsigned int group, int report,
+				     u32 portid, unsigned int group, int report,
 				     gfp_t flags);
 
 extern int		nla_validate(const struct nlattr *head,
@@ -444,7 +444,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh)
 /**
  * nlmsg_put - Add a new netlink message to an skb
  * @skb: socket buffer to store message in
- * @pid: netlink process id
+ * @portid: netlink process id
  * @seq: sequence number of message
  * @type: message type
  * @payload: length of message payload
@@ -453,13 +453,13 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh)
  * Returns NULL if the tailroom of the skb is insufficient to store
  * the message header and payload.
  */
-static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
+static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
 					 int type, int payload, int flags)
 {
 	if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload)))
 		return NULL;
 
-	return __nlmsg_put(skb, pid, seq, type, payload, flags);
+	return __nlmsg_put(skb, portid, seq, type, payload, flags);
 }
 
 /**
@@ -478,7 +478,7 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb,
 						int type, int payload,
 						int flags)
 {
-	return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	return nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			 type, payload, flags);
 }
 
@@ -563,18 +563,18 @@ static inline void nlmsg_free(struct sk_buff *skb)
  * nlmsg_multicast - multicast a netlink message
  * @sk: netlink socket to spread messages to
  * @skb: netlink message as socket buffer
- * @pid: own netlink pid to avoid sending to yourself
+ * @portid: own netlink portid to avoid sending to yourself
  * @group: multicast group id
  * @flags: allocation flags
  */
 static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
-				  u32 pid, unsigned int group, gfp_t flags)
+				  u32 portid, unsigned int group, gfp_t flags)
 {
 	int err;
 
 	NETLINK_CB(skb).dst_group = group;
 
-	err = netlink_broadcast(sk, skb, pid, group, flags);
+	err = netlink_broadcast(sk, skb, portid, group, flags);
 	if (err > 0)
 		err = 0;
 
@@ -585,13 +585,13 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb,
  * nlmsg_unicast - unicast a netlink message
  * @sk: netlink socket to spread message to
  * @skb: netlink message as socket buffer
- * @pid: netlink pid of the destination socket
+ * @portid: netlink portid of the destination socket
  */
-static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid)
+static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 portid)
 {
 	int err;
 
-	err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
+	err = netlink_unicast(sk, skb, portid, MSG_DONTWAIT);
 	if (err > 0)
 		err = 0;
 
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 6431f5e39022..6735909f826d 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -89,7 +89,7 @@ struct nfc_target {
 };
 
 struct nfc_genl_data {
-	u32 poll_req_pid;
+	u32 poll_req_portid;
 	struct mutex genl_data_mutex;
 };
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 36ad56ba648b..ee8613f01860 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -263,7 +263,7 @@ struct km_event {
 	} data;
 
 	u32	seq;
-	u32	pid;
+	u32	portid;
 	u32	event;
 	struct net *net;
 };
@@ -310,7 +310,7 @@ extern void km_state_notify(struct xfrm_state *x, const struct km_event *c);
 
 struct xfrm_tmpl;
 extern int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
-extern void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
+extern void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 extern int __xfrm_state_delete(struct xfrm_state *x);
 
 struct xfrm_state_afinfo {
@@ -1554,7 +1554,7 @@ extern int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
 #endif
 
 extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
-extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid);
+extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid);
 extern int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr);
 
 extern void xfrm_input_init(void);
diff --git a/kernel/audit.c b/kernel/audit.c
index a24aafa850ae..e0cf64a0ae2d 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -87,11 +87,11 @@ static int	audit_failure = AUDIT_FAIL_PRINTK;
 
 /*
  * If audit records are to be written to the netlink socket, audit_pid
- * contains the pid of the auditd process and audit_nlk_pid contains
- * the pid to use to send netlink messages to that process.
+ * contains the pid of the auditd process and audit_nlk_portid contains
+ * the portid to use to send netlink messages to that process.
  */
 int		audit_pid;
-static int	audit_nlk_pid;
+static int	audit_nlk_portid;
 
 /* If audit_rate_limit is non-zero, limit the rate of sending audit records
  * to that number per second.  This prevents DoS attacks, but results in
@@ -401,7 +401,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
 	int err;
 	/* take a reference in case we can't send it and we want to hold it */
 	skb_get(skb);
-	err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0);
+	err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
 	if (err < 0) {
 		BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */
 		printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
@@ -692,7 +692,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		status_set.backlog_limit = audit_backlog_limit;
 		status_set.lost		 = atomic_read(&audit_lost);
 		status_set.backlog	 = skb_queue_len(&audit_skb_queue);
-		audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0,
+		audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0,
 				 &status_set, sizeof(status_set));
 		break;
 	case AUDIT_SET:
@@ -720,7 +720,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 							sessionid, sid, 1);
 
 			audit_pid = new_pid;
-			audit_nlk_pid = NETLINK_CB(skb).pid;
+			audit_nlk_portid = NETLINK_CB(skb).portid;
 		}
 		if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) {
 			err = audit_set_rate_limit(status_get->rate_limit,
@@ -782,7 +782,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		}
 		/* fallthrough */
 	case AUDIT_LIST:
-		err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid,
+		err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid,
 					   uid, seq, data, nlmsg_len(nlh),
 					   loginuid, sessionid, sid);
 		break;
@@ -801,7 +801,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		}
 		/* fallthrough */
 	case AUDIT_LIST_RULES:
-		err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid,
+		err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid,
 					   uid, seq, data, nlmsg_len(nlh),
 					   loginuid, sessionid, sid);
 		break;
@@ -872,7 +872,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			memcpy(sig_data->ctx, ctx, len);
 			security_release_secctx(ctx, len);
 		}
-		audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO,
+		audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO,
 				0, 0, sig_data, sizeof(*sig_data) + len);
 		kfree(sig_data);
 		break;
@@ -891,7 +891,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		rcu_read_unlock();
 
 		if (!err)
-			audit_send_reply(NETLINK_CB(skb).pid, seq,
+			audit_send_reply(NETLINK_CB(skb).portid, seq,
 					 AUDIT_TTY_GET, 0, 0, &s, sizeof(s));
 		break;
 	}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index d0a32796550f..123793cd06f9 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -467,7 +467,7 @@ static int cmd_attr_register_cpumask(struct genl_info *info)
 	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
 	if (rc < 0)
 		goto out;
-	rc = add_del_listener(info->snd_pid, mask, REGISTER);
+	rc = add_del_listener(info->snd_portid, mask, REGISTER);
 out:
 	free_cpumask_var(mask);
 	return rc;
@@ -483,7 +483,7 @@ static int cmd_attr_deregister_cpumask(struct genl_info *info)
 	rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
 	if (rc < 0)
 		goto out;
-	rc = add_del_listener(info->snd_pid, mask, DEREGISTER);
+	rc = add_del_listener(info->snd_portid, mask, DEREGISTER);
 out:
 	free_cpumask_var(mask);
 	return rc;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9ce430b4657c..ddf93efc133c 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -467,14 +467,14 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb)
 
 static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 			 const struct net_bridge_fdb_entry *fdb,
-			 u32 pid, u32 seq, int type, unsigned int flags)
+			 u32 portid, u32 seq, int type, unsigned int flags)
 {
 	unsigned long now = jiffies;
 	struct nda_cacheinfo ci;
 	struct nlmsghdr *nlh;
 	struct ndmsg *ndm;
 
-	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
+	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -555,7 +555,7 @@ int br_fdb_dump(struct sk_buff *skb,
 				goto skip;
 
 			if (fdb_fill_info(skb, br, f,
-					  NETLINK_CB(cb->skb).pid,
+					  NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq,
 					  RTM_NEWNEIGH,
 					  NLM_F_MULTI) < 0)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fe41260fbf38..093f527276a3 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -127,7 +127,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			goto skip;
 
 		if (br_fill_ifinfo(skb, port,
-				   NETLINK_CB(cb->skb).pid,
+				   NETLINK_CB(cb->skb).portid,
 				   cb->nlh->nlmsg_seq, RTM_NEWLINK,
 				   NLM_F_MULTI) < 0)
 			break;
diff --git a/net/can/gw.c b/net/can/gw.c
index b54d5e695b03..127879c55fb6 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -549,7 +549,7 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb)
 		if (idx < s_idx)
 			goto cont;
 
-		if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).pid,
+		if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid,
 		    cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0)
 			break;
 cont:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index ab7db83236c9..58a4ba27dfe3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (unresolved)
 		ops->unresolved_rules++;
 
-	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
 	flush_route_cache(ops);
 	rules_ops_put(ops);
 	return 0;
@@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		}
 
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
-				   NETLINK_CB(skb).pid);
+				   NETLINK_CB(skb).portid);
 		if (ops->delete)
 			ops->delete(rule);
 		fib_rule_put(rule);
@@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
 		if (idx < cb->args[1])
 			goto skip;
 
-		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
+		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
 				     cb->nlh->nlmsg_seq, RTM_NEWRULE,
 				     NLM_F_MULTI, ops) < 0)
 			break;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 117afaf51268..c160adb38e5a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 		if (tidx < tbl_skip || (family && tbl->family != family))
 			continue;
 
-		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
+		if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
 				       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
 				       NLM_F_MULTI) <= 0)
 			break;
@@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 				goto next;
 
 			if (neightbl_fill_param_info(skb, tbl, p,
-						     NETLINK_CB(cb->skb).pid,
+						     NETLINK_CB(cb->skb).portid,
 						     cb->nlh->nlmsg_seq,
 						     RTM_NEWNEIGHTBL,
 						     NLM_F_MULTI) <= 0)
@@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 				continue;
 			if (idx < s_idx)
 				goto next;
-			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
 					    NLM_F_MULTI) <= 0) {
@@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 				continue;
 			if (idx < s_idx)
 				goto next;
-			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+			if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
 					    NLM_F_MULTI, tbl) <= 0) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 508c5df4a09c..92575370d9f0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1081,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			if (idx < s_idx)
 				goto cont;
 			if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
-					     NETLINK_CB(cb->skb).pid,
+					     NETLINK_CB(cb->skb).portid,
 					     cb->nlh->nlmsg_seq, 0,
 					     NLM_F_MULTI,
 					     ext_filter_mask) <= 0)
@@ -1899,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (nskb == NULL)
 		return -ENOBUFS;
 
-	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid,
+	err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
 			       nlh->nlmsg_seq, 0, 0, ext_filter_mask);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in if_nlmsg_size */
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(nskb);
 	} else
-		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
+		err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
 
 	return err;
 }
@@ -2180,9 +2180,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
 {
 	struct netdev_hw_addr *ha;
 	int err;
-	u32 pid, seq;
+	u32 portid, seq;
 
-	pid = NETLINK_CB(cb->skb).pid;
+	portid = NETLINK_CB(cb->skb).portid;
 	seq = cb->nlh->nlmsg_seq;
 
 	list_for_each_entry(ha, &list->list, list) {
@@ -2190,7 +2190,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
 			goto skip;
 
 		err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
-					      pid, seq, 0, NTF_SELF);
+					      portid, seq, 0, NTF_SELF);
 		if (err < 0)
 			return err;
 skip:
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 81f2bb62dea3..70989e672304 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1319,7 +1319,7 @@ nla_put_failure:
 }
 
 static int dcbnl_notify(struct net_device *dev, int event, int cmd,
-			u32 seq, u32 pid, int dcbx_ver)
+			u32 seq, u32 portid, int dcbx_ver)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
@@ -1330,7 +1330,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd,
 	if (!ops)
 		return -EOPNOTSUPP;
 
-	skb = dcbnl_newmsg(event, cmd, pid, seq, 0, &nlh);
+	skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh);
 	if (!skb)
 		return -ENOBUFS;
 
@@ -1353,16 +1353,16 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd,
 }
 
 int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd,
-		      u32 seq, u32 pid)
+		      u32 seq, u32 portid)
 {
-	return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE);
+	return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_IEEE);
 }
 EXPORT_SYMBOL(dcbnl_ieee_notify);
 
 int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
-		     u32 seq, u32 pid)
+		     u32 seq, u32 portid)
 {
-	return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE);
+	return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_CEE);
 }
 EXPORT_SYMBOL(dcbnl_cee_notify);
 
@@ -1656,7 +1656,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	struct net_device *netdev;
 	struct dcbmsg *dcb = nlmsg_data(nlh);
 	struct nlattr *tb[DCB_ATTR_MAX + 1];
-	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = -EINVAL;
 	struct sk_buff *reply_skb;
 	struct nlmsghdr *reply_nlh = NULL;
@@ -1690,7 +1690,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		goto out;
 	}
 
-	reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, pid, nlh->nlmsg_seq,
+	reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq,
 				 nlh->nlmsg_flags, &reply_nlh);
 	if (!reply_skb) {
 		ret = -ENOBUFS;
@@ -1705,7 +1705,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	nlmsg_end(reply_skb, reply_nlh);
 
-	ret = rtnl_unicast(reply_skb, &init_net, pid);
+	ret = rtnl_unicast(reply_skb, &init_net, portid);
 out:
 	dev_put(netdev);
 	return ret;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index f3924ab1e019..7b7e561412d3 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -667,12 +667,12 @@ static inline size_t dn_ifaddr_nlmsg_size(void)
 }
 
 static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
-			     u32 pid, u32 seq, int event, unsigned int flags)
+			     u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr *nlh;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -753,7 +753,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 			if (dn_idx < skip_naddr)
 				continue;
 
-			if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
+			if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq, RTM_NEWADDR,
 					      NLM_F_MULTI) < 0)
 				goto done;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c855e8d0738f..b57419cc41a4 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1543,7 +1543,7 @@ static int dn_route_input(struct sk_buff *skb)
 	return dn_route_input_slow(skb);
 }
 
-static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 			   int event, int nowait, unsigned int flags)
 {
 	struct dn_route *rt = (struct dn_route *)skb_dst(skb);
@@ -1551,7 +1551,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	struct nlmsghdr *nlh;
 	long expires;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -1685,7 +1685,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
-	err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
+	err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
 
 	if (err == 0)
 		goto out_free;
@@ -1694,7 +1694,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		goto out_free;
 	}
 
-	return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+	return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid);
 
 out_free:
 	kfree_skb(skb);
@@ -1737,7 +1737,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			if (idx < s_idx)
 				continue;
 			skb_dst_set(skb, dst_clone(&rt->dst));
-			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
+			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					1, NLM_F_MULTI) <= 0) {
 				skb_dst_drop(skb);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 16c986ab1228..f968c1b58f47 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -291,14 +291,14 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
 	return payload;
 }
 
-static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
 			struct dn_fib_info *fi, unsigned int flags)
 {
 	struct rtmsg *rtm;
 	struct nlmsghdr *nlh;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -374,14 +374,14 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
 			struct nlmsghdr *nlh, struct netlink_skb_parms *req)
 {
 	struct sk_buff *skb;
-	u32 pid = req ? req->pid : 0;
+	u32 portid = req ? req->portid : 0;
 	int err = -ENOBUFS;
 
 	skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL);
 	if (skb == NULL)
 		goto errout;
 
-	err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id,
+	err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id,
 			       f->fn_type, f->fn_scope, &f->fn_key, z,
 			       DN_FIB_INFO(f), 0);
 	if (err < 0) {
@@ -390,7 +390,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+	rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
 	return;
 errout:
 	if (err < 0)
@@ -411,7 +411,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
 			continue;
 		if (f->fn_state & DN_S_ZOMBIE)
 			continue;
-		if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
+		if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq,
 				RTM_NEWROUTE,
 				tb->n,
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 1e9917124e75..96bb08abece2 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -246,7 +246,7 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(ieee802154_nl_start_confirm);
 
-static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid,
+static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
 	u32 seq, int flags, struct net_device *dev)
 {
 	void *hdr;
@@ -534,7 +534,7 @@ static int ieee802154_list_iface(struct sk_buff *skb,
 	if (!msg)
 		goto out_dev;
 
-	rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq,
+	rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq,
 			0, dev);
 	if (rc < 0)
 		goto out_free;
@@ -565,7 +565,7 @@ static int ieee802154_dump_iface(struct sk_buff *skb,
 		if (idx < s_idx || (dev->type != ARPHRD_IEEE802154))
 			goto cont;
 
-		if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid,
+		if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid,
 			cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0)
 			break;
 cont:
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index d54be34cca94..22b1a7058fd3 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -35,7 +35,7 @@
 
 #include "ieee802154.h"
 
-static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid,
+static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
 	u32 seq, int flags, struct wpan_phy *phy)
 {
 	void *hdr;
@@ -105,7 +105,7 @@ static int ieee802154_list_phy(struct sk_buff *skb,
 	if (!msg)
 		goto out_dev;
 
-	rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq,
+	rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq,
 			0, phy);
 	if (rc < 0)
 		goto out_free;
@@ -138,7 +138,7 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data)
 		return 0;
 
 	rc = ieee802154_nl_fill_phy(data->skb,
-			NETLINK_CB(data->cb->skb).pid,
+			NETLINK_CB(data->cb->skb).portid,
 			data->cb->nlh->nlmsg_seq,
 			NLM_F_MULTI,
 			phy);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f14eff506743..7b00556e184b 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -311,7 +311,7 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 }
 
 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
-			 int destroy, struct nlmsghdr *nlh, u32 pid)
+			 int destroy, struct nlmsghdr *nlh, u32 portid)
 {
 	struct in_ifaddr *promote = NULL;
 	struct in_ifaddr *ifa, *ifa1 = *ifap;
@@ -345,7 +345,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 				inet_hash_remove(ifa);
 				*ifap1 = ifa->ifa_next;
 
-				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
+				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 				blocking_notifier_call_chain(&inetaddr_chain,
 						NETDEV_DOWN, ifa);
 				inet_free_ifa(ifa);
@@ -382,7 +382,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 	   is valid, it will try to restore deleted routes... Grr.
 	   So that, this order is correct.
 	 */
-	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
+	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 
 	if (promote) {
@@ -395,7 +395,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 		}
 
 		promote->ifa_flags &= ~IFA_F_SECONDARY;
-		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
+		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 		blocking_notifier_call_chain(&inetaddr_chain,
 				NETDEV_UP, promote);
 		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
@@ -417,7 +417,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 }
 
 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
-			     u32 pid)
+			     u32 portid)
 {
 	struct in_device *in_dev = ifa->ifa_dev;
 	struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -464,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 	/* Send message first, then call notifier.
 	   Notifier will trigger FIB update, so that
 	   listeners of netlink will know about new ifaddr */
-	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
+	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
 	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 
 	return 0;
@@ -563,7 +563,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 			continue;
 
-		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
+		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 		return 0;
 	}
 
@@ -649,7 +649,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
 	if (IS_ERR(ifa))
 		return PTR_ERR(ifa);
 
-	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
+	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
 }
 
 /*
@@ -1246,12 +1246,12 @@ static size_t inet_nlmsg_size(void)
 }
 
 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
-			    u32 pid, u32 seq, int event, unsigned int flags)
+			    u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr  *nlh;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -1313,7 +1313,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 				if (ip_idx < s_ip_idx)
 					continue;
 				if (inet_fill_ifaddr(skb, ifa,
-					     NETLINK_CB(cb->skb).pid,
+					     NETLINK_CB(cb->skb).portid,
 					     cb->nlh->nlmsg_seq,
 					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
 					rcu_read_unlock();
@@ -1335,7 +1335,7 @@ done:
 }
 
 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
-		      u32 pid)
+		      u32 portid)
 {
 	struct sk_buff *skb;
 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
@@ -1347,14 +1347,14 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 	if (skb == NULL)
 		goto errout;
 
-	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
+	err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
 	return;
 errout:
 	if (err < 0)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index dc1f10ed1872..68c93d1bb03a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -557,7 +557,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 	cfg->fc_flags = rtm->rtm_flags;
 	cfg->fc_nlflags = nlh->nlmsg_flags;
 
-	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
 	cfg->fc_nlinfo.nlh = nlh;
 	cfg->fc_nlinfo.nl_net = net;
 
@@ -955,7 +955,7 @@ static void nl_fib_input(struct sk_buff *skb)
 	struct fib_result_nl *frn;
 	struct nlmsghdr *nlh;
 	struct fib_table *tb;
-	u32 pid;
+	u32 portid;
 
 	net = sock_net(skb->sk);
 	nlh = nlmsg_hdr(skb);
@@ -973,10 +973,10 @@ static void nl_fib_input(struct sk_buff *skb)
 
 	nl_fib_lookup(frn, tb);
 
-	pid = NETLINK_CB(skb).pid;      /* pid of sending process */
-	NETLINK_CB(skb).pid = 0;        /* from kernel */
+	portid = NETLINK_CB(skb).portid;      /* pid of sending process */
+	NETLINK_CB(skb).portid = 0;        /* from kernel */
 	NETLINK_CB(skb).dst_group = 0;  /* unicast */
-	netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
+	netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
 }
 
 static int __net_init nl_fib_lookup_init(struct net *net)
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index da80dc14cc76..3509065e409a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -391,7 +391,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 	if (skb == NULL)
 		goto errout;
 
-	err = fib_dump_info(skb, info->pid, seq, event, tb_id,
+	err = fib_dump_info(skb, info->portid, seq, event, tb_id,
 			    fa->fa_type, key, dst_len,
 			    fa->fa_tos, fa->fa_info, nlm_flags);
 	if (err < 0) {
@@ -400,7 +400,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
+	rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE,
 		    info->nlh, GFP_KERNEL);
 	return;
 errout:
@@ -989,14 +989,14 @@ failure:
 	return ERR_PTR(err);
 }
 
-int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		  u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos,
 		  struct fib_info *fi, unsigned int flags)
 {
 	struct nlmsghdr *nlh;
 	struct rtmsg *rtm;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8d766106b540..31d771ca9a70 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1873,7 +1873,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
 			continue;
 		}
 
-		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
+		if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
 				  cb->nlh->nlmsg_seq,
 				  RTM_NEWROUTE,
 				  tb->tb_id,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 8bc005b1435f..535584c00f91 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -70,7 +70,7 @@ static inline void inet_diag_unlock_handler(
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      struct user_namespace *user_ns,		      	
-			      u32 pid, u32 seq, u16 nlmsg_flags,
+			      u32 portid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
 	const struct inet_sock *inet = inet_sk(sk);
@@ -84,7 +84,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 	handler = inet_diag_table[req->sdiag_protocol];
 	BUG_ON(handler == NULL);
 
-	nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
+	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
 			nlmsg_flags);
 	if (!nlh)
 		return -EMSGSIZE;
@@ -201,23 +201,23 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
 static int inet_csk_diag_fill(struct sock *sk,
 			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      struct user_namespace *user_ns,
-			      u32 pid, u32 seq, u16 nlmsg_flags,
+			      u32 portid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
 	return inet_sk_diag_fill(sk, inet_csk(sk),
-			skb, req, user_ns, pid, seq, nlmsg_flags, unlh);
+			skb, req, user_ns, portid, seq, nlmsg_flags, unlh);
 }
 
 static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 			       struct sk_buff *skb, struct inet_diag_req_v2 *req,
-			       u32 pid, u32 seq, u16 nlmsg_flags,
+			       u32 portid, u32 seq, u16 nlmsg_flags,
 			       const struct nlmsghdr *unlh)
 {
 	long tmo;
 	struct inet_diag_msg *r;
 	struct nlmsghdr *nlh;
 
-	nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
+	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
 			nlmsg_flags);
 	if (!nlh)
 		return -EMSGSIZE;
@@ -260,14 +260,14 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			struct inet_diag_req_v2 *r,
 			struct user_namespace *user_ns,
-			u32 pid, u32 seq, u16 nlmsg_flags,
+			u32 portid, u32 seq, u16 nlmsg_flags,
 			const struct nlmsghdr *unlh)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
 		return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
-					   skb, r, pid, seq, nlmsg_flags,
+					   skb, r, portid, seq, nlmsg_flags,
 					   unlh);
-	return inet_csk_diag_fill(sk, skb, r, user_ns, pid, seq, nlmsg_flags, unlh);
+	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh);
 }
 
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
@@ -316,14 +316,14 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
 
 	err = sk_diag_fill(sk, rep, req,
 			   sk_user_ns(NETLINK_CB(in_skb).ssk),
-			   NETLINK_CB(in_skb).pid,
+			   NETLINK_CB(in_skb).portid,
 			   nlh->nlmsg_seq, 0, nlh);
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		nlmsg_free(rep);
 		goto out;
 	}
-	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
 			      MSG_DONTWAIT);
 	if (err > 0)
 		err = 0;
@@ -557,7 +557,7 @@ static int inet_csk_diag_dump(struct sock *sk,
 
 	return inet_csk_diag_fill(sk, skb, r,
 				  sk_user_ns(NETLINK_CB(cb->skb).ssk),
-				  NETLINK_CB(cb->skb).pid,
+				  NETLINK_CB(cb->skb).portid,
 				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 }
 
@@ -592,14 +592,14 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 	}
 
 	return inet_twsk_diag_fill(tw, skb, r,
-				   NETLINK_CB(cb->skb).pid,
+				   NETLINK_CB(cb->skb).portid,
 				   cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 }
 
 static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 			      struct request_sock *req,
 			      struct user_namespace *user_ns,
-			      u32 pid, u32 seq,
+			      u32 portid, u32 seq,
 			      const struct nlmsghdr *unlh)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
@@ -608,7 +608,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	struct nlmsghdr *nlh;
 	long tmo;
 
-	nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r),
+	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
 			NLM_F_MULTI);
 	if (!nlh)
 		return -EMSGSIZE;
@@ -711,7 +711,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 			err = inet_diag_fill_req(skb, sk, req,
 					       sk_user_ns(NETLINK_CB(cb->skb).ssk),
-					       NETLINK_CB(cb->skb).pid,
+					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq, cb->nlh);
 			if (err < 0) {
 				cb->args[3] = j + 1;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8aa7a4cf9139..1daa95c2a0ba 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -626,7 +626,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 			e->error = -ETIMEDOUT;
 			memset(&e->msg, 0, sizeof(e->msg));
 
-			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else {
 			kfree_skb(skb);
 		}
@@ -870,7 +870,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
 
-			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else {
 			ip_mr_forward(net, mrt, skb, c, 0);
 		}
@@ -2117,12 +2117,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
 }
 
 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			    u32 pid, u32 seq, struct mfc_cache *c)
+			    u32 portid, u32 seq, struct mfc_cache *c)
 {
 	struct nlmsghdr *nlh;
 	struct rtmsg *rtm;
 
-	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -2176,7 +2176,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 				if (e < s_e)
 					goto next_entry;
 				if (ipmr_fill_mroute(mrt, skb,
-						     NETLINK_CB(cb->skb).pid,
+						     NETLINK_CB(cb->skb).portid,
 						     cb->nlh->nlmsg_seq,
 						     mfc) < 0)
 					goto done;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d39edf16d607..940f4f4cb201 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2136,7 +2136,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
 static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
-			struct flowi4 *fl4, struct sk_buff *skb, u32 pid,
+			struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
 			u32 seq, int event, int nowait, unsigned int flags)
 {
 	struct rtable *rt = skb_rtable(skb);
@@ -2146,7 +2146,7 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	u32 error;
 	u32 metrics[RTAX_MAX];
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -2306,12 +2306,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		rt->rt_flags |= RTCF_NOTIFY;
 
 	err = rt_fill_info(net, dst, src, &fl4, skb,
-			   NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+			   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 			   RTM_NEWROUTE, 0, 0);
 	if (err <= 0)
 		goto errout_free;
 
-	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 errout:
 	return err;
 
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 988edb63ee73..4c752a6e0bcd 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -803,7 +803,7 @@ static int tcp_metrics_dump_info(struct sk_buff *skb,
 {
 	void *hdr;
 
-	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &tcp_metrics_nl_family, NLM_F_MULTI,
 			  TCP_METRICS_CMD_GET);
 	if (!hdr)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index d2f336ea82ca..505b30ad9182 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -26,7 +26,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
 
 	return inet_sk_diag_fill(sk, NULL, skb, req,
 			sk_user_ns(NETLINK_CB(cb->skb).ssk),
-			NETLINK_CB(cb->skb).pid,
+			NETLINK_CB(cb->skb).portid,
 			cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
 }
 
@@ -72,14 +72,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 
 	err = inet_sk_diag_fill(sk, NULL, rep, req,
 			   sk_user_ns(NETLINK_CB(in_skb).ssk),
-			   NETLINK_CB(in_skb).pid,
+			   NETLINK_CB(in_skb).portid,
 			   nlh->nlmsg_seq, 0, nlh);
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(rep);
 		goto out;
 	}
-	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
 			      MSG_DONTWAIT);
 	if (err > 0)
 		err = 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 572cb660837b..1237d5d037d8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3534,12 +3534,12 @@ static inline int inet6_ifaddr_msgsize(void)
 }
 
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
-			     u32 pid, u32 seq, int event, unsigned int flags)
+			     u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct nlmsghdr  *nlh;
 	u32 preferred, valid;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -3577,7 +3577,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 }
 
 static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
-				u32 pid, u32 seq, int event, u16 flags)
+				u32 portid, u32 seq, int event, u16 flags)
 {
 	struct nlmsghdr  *nlh;
 	u8 scope = RT_SCOPE_UNIVERSE;
@@ -3586,7 +3586,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 	if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -3602,7 +3602,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 }
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
-				u32 pid, u32 seq, int event, unsigned int flags)
+				u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct nlmsghdr  *nlh;
 	u8 scope = RT_SCOPE_UNIVERSE;
@@ -3611,7 +3611,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 	if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE)
 		scope = RT_SCOPE_SITE;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -3652,7 +3652,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 			if (++ip_idx < s_ip_idx)
 				continue;
 			err = inet6_fill_ifaddr(skb, ifa,
-						NETLINK_CB(cb->skb).pid,
+						NETLINK_CB(cb->skb).portid,
 						cb->nlh->nlmsg_seq,
 						RTM_NEWADDR,
 						NLM_F_MULTI);
@@ -3668,7 +3668,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 			if (ip_idx < s_ip_idx)
 				continue;
 			err = inet6_fill_ifmcaddr(skb, ifmca,
-						  NETLINK_CB(cb->skb).pid,
+						  NETLINK_CB(cb->skb).portid,
 						  cb->nlh->nlmsg_seq,
 						  RTM_GETMULTICAST,
 						  NLM_F_MULTI);
@@ -3683,7 +3683,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 			if (ip_idx < s_ip_idx)
 				continue;
 			err = inet6_fill_ifacaddr(skb, ifaca,
-						  NETLINK_CB(cb->skb).pid,
+						  NETLINK_CB(cb->skb).portid,
 						  cb->nlh->nlmsg_seq,
 						  RTM_GETANYCAST,
 						  NLM_F_MULTI);
@@ -3805,7 +3805,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		goto errout_ifa;
 	}
 
-	err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid,
+	err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid,
 				nlh->nlmsg_seq, RTM_NEWADDR, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */
@@ -3813,7 +3813,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		kfree_skb(skb);
 		goto errout_ifa;
 	}
-	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 errout_ifa:
 	in6_ifa_put(ifa);
 errout:
@@ -4015,14 +4015,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
 }
 
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
-			     u32 pid, u32 seq, int event, unsigned int flags)
+			     u32 portid, u32 seq, int event, unsigned int flags)
 {
 	struct net_device *dev = idev->dev;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	void *protoinfo;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -4080,7 +4080,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			if (!idev)
 				goto cont;
 			if (inet6_fill_ifinfo(skb, idev,
-					      NETLINK_CB(cb->skb).pid,
+					      NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq,
 					      RTM_NEWLINK, NLM_F_MULTI) <= 0)
 				goto out;
@@ -4128,14 +4128,14 @@ static inline size_t inet6_prefix_nlmsg_size(void)
 }
 
 static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
-			     struct prefix_info *pinfo, u32 pid, u32 seq,
+			     struct prefix_info *pinfo, u32 portid, u32 seq,
 			     int event, unsigned int flags)
 {
 	struct prefixmsg *pmsg;
 	struct nlmsghdr *nlh;
 	struct prefix_cacheinfo	ci;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index eb6a63632d3c..0b0171570d17 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -470,10 +470,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
 static int ip6addrlbl_fill(struct sk_buff *skb,
 			   struct ip6addrlbl_entry *p,
 			   u32 lseq,
-			   u32 pid, u32 seq, int event,
+			   u32 portid, u32 seq, int event,
 			   unsigned int flags)
 {
-	struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
+	struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event,
 					 sizeof(struct ifaddrlblmsg), flags);
 	if (!nlh)
 		return -EMSGSIZE;
@@ -503,7 +503,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		    net_eq(ip6addrlbl_net(p), net)) {
 			if ((err = ip6addrlbl_fill(skb, p,
 						   ip6addrlbl_table.seq,
-						   NETLINK_CB(cb->skb).pid,
+						   NETLINK_CB(cb->skb).portid,
 						   cb->nlh->nlmsg_seq,
 						   RTM_NEWADDRLABEL,
 						   NLM_F_MULTI)) <= 0)
@@ -574,7 +574,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 	}
 
 	err = ip6addrlbl_fill(skb, p, lseq,
-			      NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+			      NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 			      RTM_NEWADDRLABEL, 0);
 
 	ip6addrlbl_put(p);
@@ -585,7 +585,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 		goto out;
 	}
 
-	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 out:
 	return err;
 }
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 4532973f0dd4..08ea3f0b6e55 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 			skb_trim(skb, nlh->nlmsg_len);
 			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
-			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else
 			kfree_skb(skb);
 	}
@@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 				skb_trim(skb, nlh->nlmsg_len);
 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 			}
-			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
 		} else
 			ip6_mr_forward(net, mrt, skb, c);
 	}
@@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net,
 }
 
 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-			     u32 pid, u32 seq, struct mfc6_cache *c)
+			     u32 portid, u32 seq, struct mfc6_cache *c)
 {
 	struct nlmsghdr *nlh;
 	struct rtmsg *rtm;
 
-	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 				if (e < s_e)
 					goto next_entry;
 				if (ip6mr_fill_mroute(mrt, skb,
-						      NETLINK_CB(cb->skb).pid,
+						      NETLINK_CB(cb->skb).portid,
 						      cb->nlh->nlmsg_seq,
 						      mfc) < 0)
 					goto done;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 339d921cf3b6..a81c6790a648 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1874,7 +1874,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_dst_len	= prefixlen,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
 				  RTF_UP | RTF_PREF(pref),
-		.fc_nlinfo.pid = 0,
+		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
 		.fc_nlinfo.nl_net = net,
 	};
@@ -1924,7 +1924,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
-		.fc_nlinfo.pid = 0,
+		.fc_nlinfo.portid = 0,
 		.fc_nlinfo.nlh = NULL,
 		.fc_nlinfo.nl_net = dev_net(dev),
 	};
@@ -2285,7 +2285,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (rtm->rtm_type == RTN_LOCAL)
 		cfg->fc_flags |= RTF_LOCAL;
 
-	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
 	cfg->fc_nlinfo.nlh = nlh;
 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
 
@@ -2376,7 +2376,7 @@ static inline size_t rt6_nlmsg_size(void)
 static int rt6_fill_node(struct net *net,
 			 struct sk_buff *skb, struct rt6_info *rt,
 			 struct in6_addr *dst, struct in6_addr *src,
-			 int iif, int type, u32 pid, u32 seq,
+			 int iif, int type, u32 portid, u32 seq,
 			 int prefix, int nowait, unsigned int flags)
 {
 	struct rtmsg *rtm;
@@ -2392,7 +2392,7 @@ static int rt6_fill_node(struct net *net,
 		}
 	}
 
-	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
+	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -2537,7 +2537,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
 	return rt6_fill_node(arg->net,
 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
-		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
+		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
 		     prefix, 0, NLM_F_MULTI);
 }
 
@@ -2617,14 +2617,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	skb_dst_set(skb, &rt->dst);
 
 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
-			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
+			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
 			    nlh->nlmsg_seq, 0, 0, 0);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto errout;
 	}
 
-	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 errout:
 	return err;
 }
@@ -2644,14 +2644,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 		goto errout;
 
 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
-				event, info->pid, seq, 0, 0, 0);
+				event, info->portid, seq, 0, 0, 0);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
+	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
 		    info->nlh, gfp_any());
 	return;
 errout:
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index 6c7c4b92e4f8..c32971269280 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -100,7 +100,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
 		goto err_out;
 	}
 
-	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
 			  &irda_nl_family, 0,  IRDA_NL_CMD_GET_MODE);
 	if (hdr == NULL) {
 		ret = -EMSGSIZE;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 334f93b8cfcb..2ca7d7f6861c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -54,7 +54,7 @@ struct pfkey_sock {
 
 	struct {
 		uint8_t		msg_version;
-		uint32_t	msg_pid;
+		uint32_t	msg_portid;
 		int		(*dump)(struct pfkey_sock *sk);
 		void		(*done)(struct pfkey_sock *sk);
 		union {
@@ -1447,7 +1447,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
 	hdr->sadb_msg_errno = 0;
 	hdr->sadb_msg_reserved = 0;
 	hdr->sadb_msg_seq = c->seq;
-	hdr->sadb_msg_pid = c->pid;
+	hdr->sadb_msg_pid = c->portid;
 
 	pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x));
 
@@ -1486,7 +1486,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
 	else
 		c.event = XFRM_MSG_UPDSA;
 	c.seq = hdr->sadb_msg_seq;
-	c.pid = hdr->sadb_msg_pid;
+	c.portid = hdr->sadb_msg_pid;
 	km_state_notify(x, &c);
 out:
 	xfrm_state_put(x);
@@ -1523,7 +1523,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		goto out;
 
 	c.seq = hdr->sadb_msg_seq;
-	c.pid = hdr->sadb_msg_pid;
+	c.portid = hdr->sadb_msg_pid;
 	c.event = XFRM_MSG_DELSA;
 	km_state_notify(x, &c);
 out:
@@ -1701,7 +1701,7 @@ static int key_notify_sa_flush(const struct km_event *c)
 	hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto);
 	hdr->sadb_msg_type = SADB_FLUSH;
 	hdr->sadb_msg_seq = c->seq;
-	hdr->sadb_msg_pid = c->pid;
+	hdr->sadb_msg_pid = c->portid;
 	hdr->sadb_msg_version = PF_KEY_V2;
 	hdr->sadb_msg_errno = (uint8_t) 0;
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
@@ -1736,7 +1736,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
 
 	c.data.proto = proto;
 	c.seq = hdr->sadb_msg_seq;
-	c.pid = hdr->sadb_msg_pid;
+	c.portid = hdr->sadb_msg_pid;
 	c.event = XFRM_MSG_FLUSHSA;
 	c.net = net;
 	km_state_notify(NULL, &c);
@@ -1764,7 +1764,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
 	out_hdr->sadb_msg_errno = 0;
 	out_hdr->sadb_msg_reserved = 0;
 	out_hdr->sadb_msg_seq = count + 1;
-	out_hdr->sadb_msg_pid = pfk->dump.msg_pid;
+	out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
 
 	if (pfk->dump.skb)
 		pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
@@ -1798,7 +1798,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
 		return -EINVAL;
 
 	pfk->dump.msg_version = hdr->sadb_msg_version;
-	pfk->dump.msg_pid = hdr->sadb_msg_pid;
+	pfk->dump.msg_portid = hdr->sadb_msg_pid;
 	pfk->dump.dump = pfkey_dump_sa;
 	pfk->dump.done = pfkey_dump_sa_done;
 	xfrm_state_walk_init(&pfk->dump.u.state, proto);
@@ -2157,7 +2157,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
 		out_hdr->sadb_msg_type = event2poltype(c->event);
 	out_hdr->sadb_msg_errno = 0;
 	out_hdr->sadb_msg_seq = c->seq;
-	out_hdr->sadb_msg_pid = c->pid;
+	out_hdr->sadb_msg_pid = c->portid;
 	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
 	return 0;
 
@@ -2272,7 +2272,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		c.event = XFRM_MSG_NEWPOLICY;
 
 	c.seq = hdr->sadb_msg_seq;
-	c.pid = hdr->sadb_msg_pid;
+	c.portid = hdr->sadb_msg_pid;
 
 	km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
 	xfrm_pol_put(xp);
@@ -2351,7 +2351,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
 		goto out;
 
 	c.seq = hdr->sadb_msg_seq;
-	c.pid = hdr->sadb_msg_pid;
+	c.portid = hdr->sadb_msg_pid;
 	c.data.byid = 0;
 	c.event = XFRM_MSG_DELPOLICY;
 	km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c);
@@ -2597,7 +2597,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		if (err)
 			goto out;
 		c.seq = hdr->sadb_msg_seq;
-		c.pid = hdr->sadb_msg_pid;
+		c.portid = hdr->sadb_msg_pid;
 		c.data.byid = 1;
 		c.event = XFRM_MSG_DELPOLICY;
 		km_policy_notify(xp, dir, &c);
@@ -2634,7 +2634,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
 	out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
 	out_hdr->sadb_msg_errno = 0;
 	out_hdr->sadb_msg_seq = count + 1;
-	out_hdr->sadb_msg_pid = pfk->dump.msg_pid;
+	out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
 
 	if (pfk->dump.skb)
 		pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
@@ -2663,7 +2663,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb
 		return -EBUSY;
 
 	pfk->dump.msg_version = hdr->sadb_msg_version;
-	pfk->dump.msg_pid = hdr->sadb_msg_pid;
+	pfk->dump.msg_portid = hdr->sadb_msg_pid;
 	pfk->dump.dump = pfkey_dump_sp;
 	pfk->dump.done = pfkey_dump_sp_done;
 	xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN);
@@ -2682,7 +2682,7 @@ static int key_notify_policy_flush(const struct km_event *c)
 	hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
 	hdr->sadb_msg_type = SADB_X_SPDFLUSH;
 	hdr->sadb_msg_seq = c->seq;
-	hdr->sadb_msg_pid = c->pid;
+	hdr->sadb_msg_pid = c->portid;
 	hdr->sadb_msg_version = PF_KEY_V2;
 	hdr->sadb_msg_errno = (uint8_t) 0;
 	hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
@@ -2711,7 +2711,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad
 
 	c.data.type = XFRM_POLICY_TYPE_MAIN;
 	c.event = XFRM_MSG_FLUSHPOLICY;
-	c.pid = hdr->sadb_msg_pid;
+	c.portid = hdr->sadb_msg_pid;
 	c.seq = hdr->sadb_msg_seq;
 	c.net = net;
 	km_policy_notify(NULL, 0, &c);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index d71cd9229a47..6ec3f67ad3f1 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -78,7 +78,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
 			  &l2tp_nl_family, 0, L2TP_CMD_NOOP);
 	if (IS_ERR(hdr)) {
 		ret = PTR_ERR(hdr);
@@ -87,7 +87,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
 
 	genlmsg_end(msg, hdr);
 
-	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
 
 err_out:
 	nlmsg_free(msg);
@@ -235,7 +235,7 @@ out:
 	return ret;
 }
 
-static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
 			       struct l2tp_tunnel *tunnel)
 {
 	void *hdr;
@@ -248,7 +248,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
 	struct l2tp_stats stats;
 	unsigned int start;
 
-	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
+	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags,
 			  L2TP_CMD_TUNNEL_GET);
 	if (IS_ERR(hdr))
 		return PTR_ERR(hdr);
@@ -359,12 +359,12 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq,
+	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
 				  NLM_F_ACK, tunnel);
 	if (ret < 0)
 		goto err_out;
 
-	return genlmsg_unicast(net, msg, info->snd_pid);
+	return genlmsg_unicast(net, msg, info->snd_portid);
 
 err_out:
 	nlmsg_free(msg);
@@ -384,7 +384,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
 		if (tunnel == NULL)
 			goto out;
 
-		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid,
+		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					tunnel) <= 0)
 			goto out;
@@ -604,7 +604,7 @@ out:
 	return ret;
 }
 
-static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
 				struct l2tp_session *session)
 {
 	void *hdr;
@@ -616,7 +616,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags
 
 	sk = tunnel->sock;
 
-	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
 	if (IS_ERR(hdr))
 		return PTR_ERR(hdr);
 
@@ -705,12 +705,12 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq,
+	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
 				   0, session);
 	if (ret < 0)
 		goto err_out;
 
-	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
 
 err_out:
 	nlmsg_free(msg);
@@ -742,7 +742,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 			continue;
 		}
 
-		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid,
+		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					 session) <= 0)
 			break;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 9730882697aa..ad39ef406851 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -563,13 +563,13 @@ flag_exist(const struct nlmsghdr *nlh)
 }
 
 static struct nlmsghdr *
-start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags,
+start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
 	  enum ipset_cmd cmd)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 
-	nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
+	nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
 			sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		return NULL;
@@ -1045,7 +1045,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 	ip_set_id_t index = IPSET_INVALID_ID, max;
 	struct ip_set *set = NULL;
 	struct nlmsghdr *nlh = NULL;
-	unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0;
+	unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0;
 	u32 dump_type, dump_flags;
 	int ret = 0;
 
@@ -1093,7 +1093,7 @@ dump_last:
 			pr_debug("reference set\n");
 			__ip_set_get(index);
 		}
-		nlh = start_msg(skb, NETLINK_CB(cb->skb).pid,
+		nlh = start_msg(skb, NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, flags,
 				IPSET_CMD_LIST);
 		if (!nlh) {
@@ -1226,7 +1226,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 		skb2 = nlmsg_new(payload, GFP_KERNEL);
 		if (skb2 == NULL)
 			return -ENOMEM;
-		rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
+		rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
 				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
 		errmsg = nlmsg_data(rep);
 		errmsg->error = ret;
@@ -1241,7 +1241,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 
 		*errline = lineno;
 
-		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
 		/* Signal netlink not to send its ACK/errmsg.  */
 		return -EINTR;
 	}
@@ -1416,7 +1416,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 	if (skb2 == NULL)
 		return -ENOMEM;
 
-	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
 			 IPSET_CMD_HEADER);
 	if (!nlh2)
 		goto nlmsg_failure;
@@ -1428,7 +1428,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
 	if (ret < 0)
 		return ret;
 
@@ -1476,7 +1476,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
 	if (skb2 == NULL)
 		return -ENOMEM;
 
-	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
 			 IPSET_CMD_TYPE);
 	if (!nlh2)
 		goto nlmsg_failure;
@@ -1489,7 +1489,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
 	nlmsg_end(skb2, nlh2);
 
 	pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len);
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
 	if (ret < 0)
 		return ret;
 
@@ -1525,7 +1525,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
 	if (skb2 == NULL)
 		return -ENOMEM;
 
-	nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0,
+	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
 			 IPSET_CMD_PROTOCOL);
 	if (!nlh2)
 		goto nlmsg_failure;
@@ -1533,7 +1533,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
 		goto nla_put_failure;
 	nlmsg_end(skb2, nlh2);
 
-	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 767cc12da0fe..0f924bf19c2b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2939,7 +2939,7 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb,
 {
 	void *hdr;
 
-	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &ip_vs_genl_family, NLM_F_MULTI,
 			  IPVS_CMD_NEW_SERVICE);
 	if (!hdr)
@@ -3128,7 +3128,7 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
 {
 	void *hdr;
 
-	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &ip_vs_genl_family, NLM_F_MULTI,
 			  IPVS_CMD_NEW_DEST);
 	if (!hdr)
@@ -3257,7 +3257,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
 				  struct netlink_callback *cb)
 {
 	void *hdr;
-	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &ip_vs_genl_family, NLM_F_MULTI,
 			  IPVS_CMD_NEW_DAEMON);
 	if (!hdr)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index e7be79e640de..de9781b6464f 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -61,7 +61,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
 		goto out_unlock;
 
 	item.ct = ct;
-	item.pid = 0;
+	item.portid = 0;
 	item.report = 0;
 
 	ret = notify->fcn(events | missed, &item);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a205bd6ce294..59770039b825 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -418,16 +418,16 @@ nla_put_failure:
 }
 
 static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		    struct nf_conn *ct)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
-	unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 
 	event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -604,7 +604,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 		goto errout;
 
 	type |= NFNL_SUBSYS_CTNETLINK << 8;
-	nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -680,7 +680,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	err = nfnetlink_send(skb, net, item->pid, group, item->report,
+	err = nfnetlink_send(skb, net, item->portid, group, item->report,
 			     GFP_ATOMIC);
 	if (err == -ENOBUFS || err == -EAGAIN)
 		return -ENOBUFS;
@@ -757,7 +757,7 @@ restart:
 #endif
 			rcu_read_lock();
 			res =
-			ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+			ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
 					    ct);
@@ -961,7 +961,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	else {
 		/* Flush the whole table */
 		nf_conntrack_flush_report(net,
-					 NETLINK_CB(skb).pid,
+					 NETLINK_CB(skb).portid,
 					 nlmsg_report(nlh));
 		return 0;
 	}
@@ -985,7 +985,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 	if (del_timer(&ct->timeout)) {
 		if (nf_conntrack_event_report(IPCT_DESTROY, ct,
-					      NETLINK_CB(skb).pid,
+					      NETLINK_CB(skb).portid,
 					      nlmsg_report(nlh)) < 0) {
 			nf_ct_delete_from_lists(ct);
 			/* we failed to report the event, try later */
@@ -1069,14 +1069,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	rcu_read_lock();
-	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
+	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
 	rcu_read_unlock();
 	nf_ct_put(ct);
 	if (err <= 0)
 		goto free;
 
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
 	if (err < 0)
 		goto out;
 
@@ -1616,7 +1616,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 						      (1 << IPCT_PROTOINFO) |
 						      (1 << IPCT_NATSEQADJ) |
 						      (1 << IPCT_MARK) | events,
-						      ct, NETLINK_CB(skb).pid,
+						      ct, NETLINK_CB(skb).portid,
 						      nlmsg_report(nlh));
 			nf_ct_put(ct);
 		}
@@ -1638,7 +1638,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 						      (1 << IPCT_PROTOINFO) |
 						      (1 << IPCT_NATSEQADJ) |
 						      (1 << IPCT_MARK),
-						      ct, NETLINK_CB(skb).pid,
+						      ct, NETLINK_CB(skb).portid,
 						      nlmsg_report(nlh));
 		}
 	}
@@ -1648,15 +1648,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 }
 
 static int
-ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 				__u16 cpu, const struct ip_conntrack_stat *st)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 
 	event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU);
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -1708,7 +1708,7 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 		st = per_cpu_ptr(net->ct.stat, cpu);
 		if (ctnetlink_ct_stat_cpu_fill_info(skb,
-						    NETLINK_CB(cb->skb).pid,
+						    NETLINK_CB(cb->skb).portid,
 						    cb->nlh->nlmsg_seq,
 						    cpu, st) < 0)
 				break;
@@ -1734,16 +1734,16 @@ ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb,
 }
 
 static int
-ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 			    struct net *net)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 	unsigned int nr_conntracks = atomic_read(&net->ct.count);
 
 	event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS);
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -1776,14 +1776,14 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb,
 	if (skb2 == NULL)
 		return -ENOMEM;
 
-	err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid,
+	err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid,
 					  nlh->nlmsg_seq,
 					  NFNL_MSG_TYPE(nlh->nlmsg_type),
 					  sock_net(skb->sk));
 	if (err <= 0)
 		goto free;
 
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
 	if (err < 0)
 		goto out;
 
@@ -2073,15 +2073,15 @@ nla_put_failure:
 }
 
 static int
-ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 			int event, const struct nf_conntrack_expect *exp)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned int flags = pid ? NLM_F_MULTI : 0;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
 
 	event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -2132,7 +2132,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 		goto errout;
 
 	type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-	nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -2147,7 +2147,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
 	rcu_read_unlock();
 
 	nlmsg_end(skb, nlh);
-	nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC);
+	nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC);
 	return 0;
 
 nla_put_failure:
@@ -2190,7 +2190,7 @@ restart:
 				cb->args[1] = 0;
 			}
 			if (ctnetlink_exp_fill_info(skb,
-						    NETLINK_CB(cb->skb).pid,
+						    NETLINK_CB(cb->skb).portid,
 						    cb->nlh->nlmsg_seq,
 						    IPCTNL_MSG_EXP_NEW,
 						    exp) < 0) {
@@ -2283,14 +2283,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	rcu_read_lock();
-	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
+	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid,
 				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
 	rcu_read_unlock();
 	nf_ct_expect_put(exp);
 	if (err <= 0)
 		goto free;
 
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
 	if (err < 0)
 		goto out;
 
@@ -2344,7 +2344,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 		/* after list removal, usage count == 1 */
 		spin_lock_bh(&nf_conntrack_lock);
 		if (del_timer(&exp->timeout)) {
-			nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid,
+			nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
 						   nlmsg_report(nlh));
 			nf_ct_expect_put(exp);
 		}
@@ -2366,7 +2366,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 				if (!strcmp(m_help->helper->name, name) &&
 				    del_timer(&exp->timeout)) {
 					nf_ct_unlink_expect_report(exp,
-							NETLINK_CB(skb).pid,
+							NETLINK_CB(skb).portid,
 							nlmsg_report(nlh));
 					nf_ct_expect_put(exp);
 				}
@@ -2382,7 +2382,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 						  hnode) {
 				if (del_timer(&exp->timeout)) {
 					nf_ct_unlink_expect_report(exp,
-							NETLINK_CB(skb).pid,
+							NETLINK_CB(skb).portid,
 							nlmsg_report(nlh));
 					nf_ct_expect_put(exp);
 				}
@@ -2447,7 +2447,7 @@ static int
 ctnetlink_create_expect(struct net *net, u16 zone,
 			const struct nlattr * const cda[],
 			u_int8_t u3,
-			u32 pid, int report)
+			u32 portid, int report)
 {
 	struct nf_conntrack_tuple tuple, mask, master_tuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
@@ -2560,7 +2560,7 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 		if (err < 0)
 			goto err_out;
 	}
-	err = nf_ct_expect_related_report(exp, pid, report);
+	err = nf_ct_expect_related_report(exp, portid, report);
 err_out:
 	nf_ct_expect_put(exp);
 out:
@@ -2603,7 +2603,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
 			err = ctnetlink_create_expect(net, zone, cda,
 						      u3,
-						      NETLINK_CB(skb).pid,
+						      NETLINK_CB(skb).portid,
 						      nlmsg_report(nlh));
 		}
 		return err;
@@ -2618,15 +2618,15 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
 }
 
 static int
-ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu,
+ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu,
 			     const struct ip_conntrack_stat *st)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
 
 	event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU);
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -2665,7 +2665,7 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 
 		st = per_cpu_ptr(net->ct.stat, cpu);
-		if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid,
 						 cb->nlh->nlmsg_seq,
 						 cpu, st) < 0)
 			break;
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index d7ec92879071..589d686f0b4c 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -91,16 +91,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 }
 
 static int
-nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		   int event, struct nf_acct *acct)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned int flags = pid ? NLM_F_MULTI : 0;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
 	u64 pkts, bytes;
 
 	event |= NFNL_SUBSYS_ACCT << 8;
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -150,7 +150,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (last && cur != last)
 			continue;
 
-		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
 				       cb->nlh->nlmsg_seq,
 				       NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
 				       NFNL_MSG_ACCT_NEW, cur) < 0) {
@@ -195,7 +195,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
 			break;
 		}
 
-		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
+		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
 					 nlh->nlmsg_seq,
 					 NFNL_MSG_TYPE(nlh->nlmsg_type),
 					 NFNL_MSG_ACCT_NEW, cur);
@@ -203,7 +203,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
 			kfree_skb(skb2);
 			break;
 		}
-		ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid,
+		ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
 					MSG_DONTWAIT);
 		if (ret > 0)
 			ret = 0;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 32a1ba3f3e27..3678073360a3 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -395,16 +395,16 @@ nla_put_failure:
 }
 
 static int
-nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 			int event, struct nf_conntrack_helper *helper)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned int flags = pid ? NLM_F_MULTI : 0;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
 	int status;
 
 	event |= NFNL_SUBSYS_CTHELPER << 8;
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -468,7 +468,7 @@ restart:
 				cb->args[1] = 0;
 			}
 			if (nfnl_cthelper_fill_info(skb,
-					    NETLINK_CB(cb->skb).pid,
+					    NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
 					    NFNL_MSG_CTHELPER_NEW, cur) < 0) {
@@ -538,7 +538,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
 				break;
 			}
 
-			ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid,
+			ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
 						nlh->nlmsg_seq,
 						NFNL_MSG_TYPE(nlh->nlmsg_type),
 						NFNL_MSG_CTHELPER_NEW, cur);
@@ -547,7 +547,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb,
 				break;
 			}
 
-			ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid,
+			ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
 						MSG_DONTWAIT);
 			if (ret > 0)
 				ret = 0;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index cdecbc8fe965..8847b4d8be06 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -155,16 +155,16 @@ err_proto_put:
 }
 
 static int
-ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 		       int event, struct ctnl_timeout *timeout)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned int flags = pid ? NLM_F_MULTI : 0;
+	unsigned int flags = portid ? NLM_F_MULTI : 0;
 	struct nf_conntrack_l4proto *l4proto = timeout->l4proto;
 
 	event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8;
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
 		goto nlmsg_failure;
 
@@ -222,7 +222,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (last && cur != last)
 			continue;
 
-		if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq,
 					   NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
 					   IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) {
@@ -268,7 +268,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
 			break;
 		}
 
-		ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid,
+		ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid,
 					     nlh->nlmsg_seq,
 					     NFNL_MSG_TYPE(nlh->nlmsg_type),
 					     IPCTNL_MSG_TIMEOUT_NEW, cur);
@@ -276,7 +276,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
 			kfree_skb(skb2);
 			break;
 		}
-		ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid,
+		ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
 					MSG_DONTWAIT);
 		if (ret > 0)
 			ret = 0;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index be194b144297..8cb67c4dbd62 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -56,7 +56,7 @@ struct nfulnl_instance {
 	struct sk_buff *skb;		/* pre-allocatd skb */
 	struct timer_list timer;
 	struct user_namespace *peer_user_ns;	/* User namespace of the peer process */
-	int peer_pid;			/* PID of the peer process */
+	int peer_portid;			/* PORTID of the peer process */
 
 	/* configurable parameters */
 	unsigned int flushtimeout;	/* timeout until queue flush */
@@ -133,7 +133,7 @@ instance_put(struct nfulnl_instance *inst)
 static void nfulnl_timer(unsigned long data);
 
 static struct nfulnl_instance *
-instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns)
+instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
 {
 	struct nfulnl_instance *inst;
 	int err;
@@ -164,7 +164,7 @@ instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns)
 	setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
 
 	inst->peer_user_ns = user_ns;
-	inst->peer_pid = pid;
+	inst->peer_portid = portid;
 	inst->group_num = group_num;
 
 	inst->qthreshold 	= NFULNL_QTHRESH_DEFAULT;
@@ -336,7 +336,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
 		if (!nlh)
 			goto out;
 	}
-	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid,
+	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid,
 				   MSG_DONTWAIT);
 
 	inst->qlen = 0;
@@ -703,7 +703,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
 		int i;
 
-		/* destroy all instances for this pid */
+		/* destroy all instances for this portid */
 		spin_lock_bh(&instances_lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
@@ -712,7 +712,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 
 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
 				if ((net_eq(n->net, &init_net)) &&
-				    (n->pid == inst->peer_pid))
+				    (n->portid == inst->peer_portid))
 					__instance_destroy(inst);
 			}
 		}
@@ -774,7 +774,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	}
 
 	inst = instance_lookup_get(group_num);
-	if (inst && inst->peer_pid != NETLINK_CB(skb).pid) {
+	if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
 		ret = -EPERM;
 		goto out_put;
 	}
@@ -788,7 +788,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			}
 
 			inst = instance_create(group_num,
-					       NETLINK_CB(skb).pid,
+					       NETLINK_CB(skb).portid,
 					       sk_user_ns(NETLINK_CB(skb).ssk));
 			if (IS_ERR(inst)) {
 				ret = PTR_ERR(inst);
@@ -947,7 +947,7 @@ static int seq_show(struct seq_file *s, void *v)
 
 	return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n",
 			  inst->group_num,
-			  inst->peer_pid, inst->qlen,
+			  inst->peer_portid, inst->qlen,
 			  inst->copy_mode, inst->copy_range,
 			  inst->flushtimeout, atomic_read(&inst->use));
 }
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index c0496a55ad0c..b8d9995b76a8 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -44,7 +44,7 @@ struct nfqnl_instance {
 	struct hlist_node hlist;		/* global list of queues */
 	struct rcu_head rcu;
 
-	int peer_pid;
+	int peer_portid;
 	unsigned int queue_maxlen;
 	unsigned int copy_range;
 	unsigned int queue_dropped;
@@ -92,7 +92,7 @@ instance_lookup(u_int16_t queue_num)
 }
 
 static struct nfqnl_instance *
-instance_create(u_int16_t queue_num, int pid)
+instance_create(u_int16_t queue_num, int portid)
 {
 	struct nfqnl_instance *inst;
 	unsigned int h;
@@ -111,7 +111,7 @@ instance_create(u_int16_t queue_num, int pid)
 	}
 
 	inst->queue_num = queue_num;
-	inst->peer_pid = pid;
+	inst->peer_portid = portid;
 	inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
 	inst->copy_range = 0xfffff;
 	inst->copy_mode = NFQNL_COPY_NONE;
@@ -440,7 +440,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	}
 	spin_lock_bh(&queue->lock);
 
-	if (!queue->peer_pid) {
+	if (!queue->peer_portid) {
 		err = -EINVAL;
 		goto err_out_free_nskb;
 	}
@@ -459,7 +459,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	*packet_id_ptr = htonl(entry->id);
 
 	/* nfnetlink_unicast will either free the nskb or add it to a socket */
-	err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT);
+	err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT);
 	if (err < 0) {
 		queue->queue_user_dropped++;
 		goto err_out_unlock;
@@ -616,7 +616,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
 		int i;
 
-		/* destroy all instances for this pid */
+		/* destroy all instances for this portid */
 		spin_lock(&instances_lock);
 		for (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
@@ -625,7 +625,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
 
 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
 				if ((n->net == &init_net) &&
-				    (n->pid == inst->peer_pid))
+				    (n->portid == inst->peer_portid))
 					__instance_destroy(inst);
 			}
 		}
@@ -650,7 +650,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
 	[NFQA_MARK]		= { .type = NLA_U32 },
 };
 
-static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid)
+static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid)
 {
 	struct nfqnl_instance *queue;
 
@@ -658,7 +658,7 @@ static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid)
 	if (!queue)
 		return ERR_PTR(-ENODEV);
 
-	if (queue->peer_pid != nlpid)
+	if (queue->peer_portid != nlportid)
 		return ERR_PTR(-EPERM);
 
 	return queue;
@@ -698,7 +698,7 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
 	LIST_HEAD(batch_list);
 	u16 queue_num = ntohs(nfmsg->res_id);
 
-	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
+	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
 	if (IS_ERR(queue))
 		return PTR_ERR(queue);
 
@@ -749,7 +749,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	queue = instance_lookup(queue_num);
 	if (!queue)
 
-	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid);
+	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid);
 	if (IS_ERR(queue))
 		return PTR_ERR(queue);
 
@@ -832,7 +832,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 
 	rcu_read_lock();
 	queue = instance_lookup(queue_num);
-	if (queue && queue->peer_pid != NETLINK_CB(skb).pid) {
+	if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
 		ret = -EPERM;
 		goto err_out_unlock;
 	}
@@ -844,7 +844,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				ret = -EBUSY;
 				goto err_out_unlock;
 			}
-			queue = instance_create(queue_num, NETLINK_CB(skb).pid);
+			queue = instance_create(queue_num, NETLINK_CB(skb).portid);
 			if (IS_ERR(queue)) {
 				ret = PTR_ERR(queue);
 				goto err_out_unlock;
@@ -1016,7 +1016,7 @@ static int seq_show(struct seq_file *s, void *v)
 
 	return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
 			  inst->queue_num,
-			  inst->peer_pid, inst->queue_total,
+			  inst->peer_portid, inst->queue_total,
 			  inst->copy_mode, inst->copy_range,
 			  inst->queue_dropped, inst->queue_user_dropped,
 			  inst->id_sequence, 1);
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 6bf878335d94..c15042f987bd 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -627,7 +627,7 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
 	struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg;
 	void *data;
 
-	data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+	data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid,
 			   cb_arg->seq, &netlbl_cipsov4_gnl_family,
 			   NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL);
 	if (data == NULL)
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 4809e2e48b02..c5384ffc6146 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -448,7 +448,7 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
 	struct netlbl_domhsh_walk_arg *cb_arg = arg;
 	void *data;
 
-	data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+	data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid,
 			   cb_arg->seq, &netlbl_mgmt_gnl_family,
 			   NLM_F_MULTI, NLBL_MGMT_C_LISTALL);
 	if (data == NULL)
@@ -613,7 +613,7 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
 	int ret_val = -ENOMEM;
 	void *data;
 
-	data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	data = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			   &netlbl_mgmt_gnl_family, NLM_F_MULTI,
 			   NLBL_MGMT_C_PROTOCOLS);
 	if (data == NULL)
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index e7ff694f1049..b7944413b404 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1096,7 +1096,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 	char *secctx;
 	u32 secctx_len;
 
-	data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid,
+	data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid,
 			   cb_arg->seq, &netlbl_unlabel_gnl_family,
 			   NLM_F_MULTI, cmd);
 	if (data == NULL)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4d348e97e131..0f2e3ad69c47 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -67,8 +67,8 @@
 struct netlink_sock {
 	/* struct sock has to be the first member of netlink_sock */
 	struct sock		sk;
-	u32			pid;
-	u32			dst_pid;
+	u32			portid;
+	u32			dst_portid;
 	u32			dst_group;
 	u32			flags;
 	u32			subscriptions;
@@ -104,7 +104,7 @@ static inline int netlink_is_kernel(struct sock *sk)
 	return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
 }
 
-struct nl_pid_hash {
+struct nl_portid_hash {
 	struct hlist_head	*table;
 	unsigned long		rehash_time;
 
@@ -118,7 +118,7 @@ struct nl_pid_hash {
 };
 
 struct netlink_table {
-	struct nl_pid_hash	hash;
+	struct nl_portid_hash	hash;
 	struct hlist_head	mc_list;
 	struct listeners __rcu	*listeners;
 	unsigned int		flags;
@@ -145,9 +145,9 @@ static inline u32 netlink_group_mask(u32 group)
 	return group ? 1 << (group - 1) : 0;
 }
 
-static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
+static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
 {
-	return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
+	return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
 }
 
 static void netlink_destroy_callback(struct netlink_callback *cb)
@@ -239,17 +239,17 @@ netlink_unlock_table(void)
 		wake_up(&nl_table_wait);
 }
 
-static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
+static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 {
-	struct nl_pid_hash *hash = &nl_table[protocol].hash;
+	struct nl_portid_hash *hash = &nl_table[protocol].hash;
 	struct hlist_head *head;
 	struct sock *sk;
 	struct hlist_node *node;
 
 	read_lock(&nl_table_lock);
-	head = nl_pid_hashfn(hash, pid);
+	head = nl_portid_hashfn(hash, portid);
 	sk_for_each(sk, node, head) {
-		if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
+		if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
 			sock_hold(sk);
 			goto found;
 		}
@@ -260,7 +260,7 @@ found:
 	return sk;
 }
 
-static struct hlist_head *nl_pid_hash_zalloc(size_t size)
+static struct hlist_head *nl_portid_hash_zalloc(size_t size)
 {
 	if (size <= PAGE_SIZE)
 		return kzalloc(size, GFP_ATOMIC);
@@ -270,7 +270,7 @@ static struct hlist_head *nl_pid_hash_zalloc(size_t size)
 					 get_order(size));
 }
 
-static void nl_pid_hash_free(struct hlist_head *table, size_t size)
+static void nl_portid_hash_free(struct hlist_head *table, size_t size)
 {
 	if (size <= PAGE_SIZE)
 		kfree(table);
@@ -278,7 +278,7 @@ static void nl_pid_hash_free(struct hlist_head *table, size_t size)
 		free_pages((unsigned long)table, get_order(size));
 }
 
-static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
+static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
 {
 	unsigned int omask, mask, shift;
 	size_t osize, size;
@@ -296,7 +296,7 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
 		size *= 2;
 	}
 
-	table = nl_pid_hash_zalloc(size);
+	table = nl_portid_hash_zalloc(size);
 	if (!table)
 		return 0;
 
@@ -311,23 +311,23 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
 		struct hlist_node *node, *tmp;
 
 		sk_for_each_safe(sk, node, tmp, &otable[i])
-			__sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
+			__sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
 	}
 
-	nl_pid_hash_free(otable, osize);
+	nl_portid_hash_free(otable, osize);
 	hash->rehash_time = jiffies + 10 * 60 * HZ;
 	return 1;
 }
 
-static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
+static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
 {
 	int avg = hash->entries >> hash->shift;
 
-	if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
+	if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
 		return 1;
 
 	if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
-		nl_pid_hash_rehash(hash, 0);
+		nl_portid_hash_rehash(hash, 0);
 		return 1;
 	}
 
@@ -356,9 +356,9 @@ netlink_update_listeners(struct sock *sk)
 	 * makes sure updates are visible before bind or setsockopt return. */
 }
 
-static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
+static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 {
-	struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+	struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
 	struct hlist_head *head;
 	int err = -EADDRINUSE;
 	struct sock *osk;
@@ -366,10 +366,10 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
 	int len;
 
 	netlink_table_grab();
-	head = nl_pid_hashfn(hash, pid);
+	head = nl_portid_hashfn(hash, portid);
 	len = 0;
 	sk_for_each(osk, node, head) {
-		if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
+		if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
 			break;
 		len++;
 	}
@@ -377,17 +377,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
 		goto err;
 
 	err = -EBUSY;
-	if (nlk_sk(sk)->pid)
+	if (nlk_sk(sk)->portid)
 		goto err;
 
 	err = -ENOMEM;
 	if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
 		goto err;
 
-	if (len && nl_pid_hash_dilute(hash, len))
-		head = nl_pid_hashfn(hash, pid);
+	if (len && nl_portid_hash_dilute(hash, len))
+		head = nl_portid_hashfn(hash, portid);
 	hash->entries++;
-	nlk_sk(sk)->pid = pid;
+	nlk_sk(sk)->portid = portid;
 	sk_add_node(sk, head);
 	err = 0;
 
@@ -518,11 +518,11 @@ static int netlink_release(struct socket *sock)
 
 	skb_queue_purge(&sk->sk_write_queue);
 
-	if (nlk->pid) {
+	if (nlk->portid) {
 		struct netlink_notify n = {
 						.net = sock_net(sk),
 						.protocol = sk->sk_protocol,
-						.pid = nlk->pid,
+						.portid = nlk->portid,
 					  };
 		atomic_notifier_call_chain(&netlink_chain,
 				NETLINK_URELEASE, &n);
@@ -559,24 +559,24 @@ static int netlink_autobind(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
-	struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+	struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
 	struct hlist_head *head;
 	struct sock *osk;
 	struct hlist_node *node;
-	s32 pid = task_tgid_vnr(current);
+	s32 portid = task_tgid_vnr(current);
 	int err;
 	static s32 rover = -4097;
 
 retry:
 	cond_resched();
 	netlink_table_grab();
-	head = nl_pid_hashfn(hash, pid);
+	head = nl_portid_hashfn(hash, portid);
 	sk_for_each(osk, node, head) {
 		if (!net_eq(sock_net(osk), net))
 			continue;
-		if (nlk_sk(osk)->pid == pid) {
-			/* Bind collision, search negative pid values. */
-			pid = rover--;
+		if (nlk_sk(osk)->portid == portid) {
+			/* Bind collision, search negative portid values. */
+			portid = rover--;
 			if (rover > -4097)
 				rover = -4097;
 			netlink_table_ungrab();
@@ -585,7 +585,7 @@ retry:
 	}
 	netlink_table_ungrab();
 
-	err = netlink_insert(sk, net, pid);
+	err = netlink_insert(sk, net, portid);
 	if (err == -EADDRINUSE)
 		goto retry;
 
@@ -668,8 +668,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			return err;
 	}
 
-	if (nlk->pid) {
-		if (nladdr->nl_pid != nlk->pid)
+	if (nlk->portid) {
+		if (nladdr->nl_pid != nlk->portid)
 			return -EINVAL;
 	} else {
 		err = nladdr->nl_pid ?
@@ -715,7 +715,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 
 	if (addr->sa_family == AF_UNSPEC) {
 		sk->sk_state	= NETLINK_UNCONNECTED;
-		nlk->dst_pid	= 0;
+		nlk->dst_portid	= 0;
 		nlk->dst_group  = 0;
 		return 0;
 	}
@@ -726,12 +726,12 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 	if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
 		return -EPERM;
 
-	if (!nlk->pid)
+	if (!nlk->portid)
 		err = netlink_autobind(sock);
 
 	if (err == 0) {
 		sk->sk_state	= NETLINK_CONNECTED;
-		nlk->dst_pid 	= nladdr->nl_pid;
+		nlk->dst_portid = nladdr->nl_pid;
 		nlk->dst_group  = ffs(nladdr->nl_groups);
 	}
 
@@ -750,10 +750,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 	*addr_len = sizeof(*nladdr);
 
 	if (peer) {
-		nladdr->nl_pid = nlk->dst_pid;
+		nladdr->nl_pid = nlk->dst_portid;
 		nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
 	} else {
-		nladdr->nl_pid = nlk->pid;
+		nladdr->nl_pid = nlk->portid;
 		nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
 	}
 	return 0;
@@ -772,19 +772,19 @@ static void netlink_overrun(struct sock *sk)
 	atomic_inc(&sk->sk_drops);
 }
 
-static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
+static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
 {
 	struct sock *sock;
 	struct netlink_sock *nlk;
 
-	sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
+	sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
 	if (!sock)
 		return ERR_PTR(-ECONNREFUSED);
 
 	/* Don't bother queuing skb if kernel socket has no input function */
 	nlk = nlk_sk(sock);
 	if (sock->sk_state == NETLINK_CONNECTED &&
-	    nlk->dst_pid != nlk_sk(ssk)->pid) {
+	    nlk->dst_portid != nlk_sk(ssk)->portid) {
 		sock_put(sock);
 		return ERR_PTR(-ECONNREFUSED);
 	}
@@ -935,7 +935,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
 }
 
 int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
-		    u32 pid, int nonblock)
+		    u32 portid, int nonblock)
 {
 	struct sock *sk;
 	int err;
@@ -945,7 +945,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
 
 	timeo = sock_sndtimeo(ssk, nonblock);
 retry:
-	sk = netlink_getsockbypid(ssk, pid);
+	sk = netlink_getsockbyportid(ssk, portid);
 	if (IS_ERR(sk)) {
 		kfree_skb(skb);
 		return PTR_ERR(sk);
@@ -1005,7 +1005,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 struct netlink_broadcast_data {
 	struct sock *exclude_sk;
 	struct net *net;
-	u32 pid;
+	u32 portid;
 	u32 group;
 	int failure;
 	int delivery_failure;
@@ -1026,7 +1026,7 @@ static int do_one_broadcast(struct sock *sk,
 	if (p->exclude_sk == sk)
 		goto out;
 
-	if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+	if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
@@ -1078,7 +1078,7 @@ out:
 	return 0;
 }
 
-int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
 	u32 group, gfp_t allocation,
 	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
 	void *filter_data)
@@ -1092,7 +1092,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
 
 	info.exclude_sk = ssk;
 	info.net = net;
-	info.pid = pid;
+	info.portid = portid;
 	info.group = group;
 	info.failure = 0;
 	info.delivery_failure = 0;
@@ -1130,17 +1130,17 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
 }
 EXPORT_SYMBOL(netlink_broadcast_filtered);
 
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
 		      u32 group, gfp_t allocation)
 {
-	return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
+	return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
 		NULL, NULL);
 }
 EXPORT_SYMBOL(netlink_broadcast);
 
 struct netlink_set_err_data {
 	struct sock *exclude_sk;
-	u32 pid;
+	u32 portid;
 	u32 group;
 	int code;
 };
@@ -1156,7 +1156,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
 	if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
 		goto out;
 
-	if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+	if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
 	    !test_bit(p->group - 1, nlk->groups))
 		goto out;
 
@@ -1174,14 +1174,14 @@ out:
 /**
  * netlink_set_err - report error to broadcast listeners
  * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
- * @pid: the PID of a process that we want to skip (if any)
+ * @portid: the PORTID of a process that we want to skip (if any)
  * @groups: the broadcast group that will notice the error
  * @code: error code, must be negative (as usual in kernelspace)
  *
  * This function returns the number of broadcast listeners that have set the
  * NETLINK_RECV_NO_ENOBUFS socket option.
  */
-int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
+int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
 {
 	struct netlink_set_err_data info;
 	struct hlist_node *node;
@@ -1189,7 +1189,7 @@ int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 	int ret = 0;
 
 	info.exclude_sk = ssk;
-	info.pid = pid;
+	info.portid = portid;
 	info.group = group;
 	/* sk->sk_err wants a positive error value */
 	info.code = -code;
@@ -1354,7 +1354,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *addr = msg->msg_name;
-	u32 dst_pid;
+	u32 dst_portid;
 	u32 dst_group;
 	struct sk_buff *skb;
 	int err;
@@ -1374,18 +1374,18 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		err = -EINVAL;
 		if (addr->nl_family != AF_NETLINK)
 			goto out;
-		dst_pid = addr->nl_pid;
+		dst_portid = addr->nl_pid;
 		dst_group = ffs(addr->nl_groups);
 		err =  -EPERM;
-		if ((dst_group || dst_pid) &&
+		if ((dst_group || dst_portid) &&
 		    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
 			goto out;
 	} else {
-		dst_pid = nlk->dst_pid;
+		dst_portid = nlk->dst_portid;
 		dst_group = nlk->dst_group;
 	}
 
-	if (!nlk->pid) {
+	if (!nlk->portid) {
 		err = netlink_autobind(sock);
 		if (err)
 			goto out;
@@ -1399,7 +1399,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
-	NETLINK_CB(skb).pid	= nlk->pid;
+	NETLINK_CB(skb).portid	= nlk->portid;
 	NETLINK_CB(skb).dst_group = dst_group;
 	NETLINK_CB(skb).creds	= siocb->scm->creds;
 
@@ -1417,9 +1417,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	if (dst_group) {
 		atomic_inc(&skb->users);
-		netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
+		netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
 	}
-	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
+	err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
 
 out:
 	scm_destroy(siocb->scm);
@@ -1482,7 +1482,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 		struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
 		addr->nl_family = AF_NETLINK;
 		addr->nl_pad    = 0;
-		addr->nl_pid	= NETLINK_CB(skb).pid;
+		addr->nl_pid	= NETLINK_CB(skb).portid;
 		addr->nl_groups	= netlink_group_mask(NETLINK_CB(skb).dst_group);
 		msg->msg_namelen = sizeof(*addr);
 	}
@@ -1683,7 +1683,7 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
 }
 
 struct nlmsghdr *
-__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
+__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
 {
 	struct nlmsghdr *nlh;
 	int size = NLMSG_LENGTH(len);
@@ -1692,7 +1692,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 	nlh->nlmsg_type = type;
 	nlh->nlmsg_len = size;
 	nlh->nlmsg_flags = flags;
-	nlh->nlmsg_pid = pid;
+	nlh->nlmsg_pid = portid;
 	nlh->nlmsg_seq = seq;
 	if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
 		memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
@@ -1788,7 +1788,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	atomic_inc(&skb->users);
 	cb->skb = skb;
 
-	sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
+	sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
 	if (sk == NULL) {
 		netlink_destroy_callback(cb);
 		return -ECONNREFUSED;
@@ -1836,7 +1836,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 
 		sk = netlink_lookup(sock_net(in_skb->sk),
 				    in_skb->sk->sk_protocol,
-				    NETLINK_CB(in_skb).pid);
+				    NETLINK_CB(in_skb).portid);
 		if (sk) {
 			sk->sk_err = ENOBUFS;
 			sk->sk_error_report(sk);
@@ -1845,12 +1845,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 		return;
 	}
 
-	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+	rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 			  NLMSG_ERROR, payload, 0);
 	errmsg = nlmsg_data(rep);
 	errmsg->error = err;
 	memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
-	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
+	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
 }
 EXPORT_SYMBOL(netlink_ack);
 
@@ -1900,33 +1900,33 @@ EXPORT_SYMBOL(netlink_rcv_skb);
  * nlmsg_notify - send a notification netlink message
  * @sk: netlink socket to use
  * @skb: notification message
- * @pid: destination netlink pid for reports or 0
+ * @portid: destination netlink portid for reports or 0
  * @group: destination multicast group or 0
  * @report: 1 to report back, 0 to disable
  * @flags: allocation flags
  */
-int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
+int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
 		 unsigned int group, int report, gfp_t flags)
 {
 	int err = 0;
 
 	if (group) {
-		int exclude_pid = 0;
+		int exclude_portid = 0;
 
 		if (report) {
 			atomic_inc(&skb->users);
-			exclude_pid = pid;
+			exclude_portid = portid;
 		}
 
 		/* errors reported via destination sk->sk_err, but propagate
 		 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
-		err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
+		err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
 	}
 
 	if (report) {
 		int err2;
 
-		err2 = nlmsg_unicast(sk, skb, pid);
+		err2 = nlmsg_unicast(sk, skb, portid);
 		if (!err || err == -ESRCH)
 			err = err2;
 	}
@@ -1951,7 +1951,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 	loff_t off = 0;
 
 	for (i = 0; i < MAX_LINKS; i++) {
-		struct nl_pid_hash *hash = &nl_table[i].hash;
+		struct nl_portid_hash *hash = &nl_table[i].hash;
 
 		for (j = 0; j <= hash->mask; j++) {
 			sk_for_each(s, node, &hash->table[j]) {
@@ -1999,7 +1999,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	j = iter->hash_idx + 1;
 
 	do {
-		struct nl_pid_hash *hash = &nl_table[i].hash;
+		struct nl_portid_hash *hash = &nl_table[i].hash;
 
 		for (; j <= hash->mask; j++) {
 			s = sk_head(&hash->table[j]);
@@ -2038,7 +2038,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 		seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
 			   s,
 			   s->sk_protocol,
-			   nlk->pid,
+			   nlk->portid,
 			   nlk->groups ? (u32)nlk->groups[0] : 0,
 			   sk_rmem_alloc_get(s),
 			   sk_wmem_alloc_get(s),
@@ -2183,12 +2183,12 @@ static int __init netlink_proto_init(void)
 	order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
 
 	for (i = 0; i < MAX_LINKS; i++) {
-		struct nl_pid_hash *hash = &nl_table[i].hash;
+		struct nl_portid_hash *hash = &nl_table[i].hash;
 
-		hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
+		hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
 		if (!hash->table) {
 			while (i-- > 0)
-				nl_pid_hash_free(nl_table[i].hash.table,
+				nl_portid_hash_free(nl_table[i].hash.table,
 						 1 * sizeof(*hash->table));
 			kfree(nl_table);
 			goto panic;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 19288b7d6135..f2aabb6f4105 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -501,7 +501,7 @@ EXPORT_SYMBOL(genl_unregister_family);
 /**
  * genlmsg_put - Add generic netlink header to netlink message
  * @skb: socket buffer holding the message
- * @pid: netlink pid the message is addressed to
+ * @portid: netlink portid the message is addressed to
  * @seq: sequence number (usually the one of the sender)
  * @family: generic netlink family
  * @flags: netlink message flags
@@ -509,13 +509,13 @@ EXPORT_SYMBOL(genl_unregister_family);
  *
  * Returns pointer to user specific header
  */
-void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
+void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
 				struct genl_family *family, int flags, u8 cmd)
 {
 	struct nlmsghdr *nlh;
 	struct genlmsghdr *hdr;
 
-	nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN +
+	nlh = nlmsg_put(skb, portid, seq, family->id, GENL_HDRLEN +
 			family->hdrsize, flags);
 	if (nlh == NULL)
 		return NULL;
@@ -585,7 +585,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	}
 
 	info.snd_seq = nlh->nlmsg_seq;
-	info.snd_pid = NETLINK_CB(skb).pid;
+	info.snd_portid = NETLINK_CB(skb).portid;
 	info.nlhdr = nlh;
 	info.genlhdr = nlmsg_data(nlh);
 	info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
@@ -626,12 +626,12 @@ static struct genl_family genl_ctrl = {
 	.netnsok = true,
 };
 
-static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
+static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
 			  u32 flags, struct sk_buff *skb, u8 cmd)
 {
 	void *hdr;
 
-	hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd);
+	hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd);
 	if (hdr == NULL)
 		return -1;
 
@@ -701,7 +701,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
+static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid,
 				u32 seq, u32 flags, struct sk_buff *skb,
 				u8 cmd)
 {
@@ -709,7 +709,7 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
 	struct nlattr *nla_grps;
 	struct nlattr *nest;
 
-	hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd);
+	hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd);
 	if (hdr == NULL)
 		return -1;
 
@@ -756,7 +756,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			if (++n < fams_to_skip)
 				continue;
-			if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid,
+			if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					   skb, CTRL_CMD_NEWFAMILY) < 0)
 				goto errout;
@@ -773,7 +773,7 @@ errout:
 }
 
 static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
-					     u32 pid, int seq, u8 cmd)
+					     u32 portid, int seq, u8 cmd)
 {
 	struct sk_buff *skb;
 	int err;
@@ -782,7 +782,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
 	if (skb == NULL)
 		return ERR_PTR(-ENOBUFS);
 
-	err = ctrl_fill_info(family, pid, seq, 0, skb, cmd);
+	err = ctrl_fill_info(family, portid, seq, 0, skb, cmd);
 	if (err < 0) {
 		nlmsg_free(skb);
 		return ERR_PTR(err);
@@ -792,7 +792,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
 }
 
 static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp,
-					    u32 pid, int seq, u8 cmd)
+					    u32 portid, int seq, u8 cmd)
 {
 	struct sk_buff *skb;
 	int err;
@@ -801,7 +801,7 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp,
 	if (skb == NULL)
 		return ERR_PTR(-ENOBUFS);
 
-	err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd);
+	err = ctrl_fill_mcgrp_info(grp, portid, seq, 0, skb, cmd);
 	if (err < 0) {
 		nlmsg_free(skb);
 		return ERR_PTR(err);
@@ -853,7 +853,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
 		return -ENOENT;
 	}
 
-	msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq,
+	msg = ctrl_build_family_msg(res, info->snd_portid, info->snd_seq,
 				    CTRL_CMD_NEWFAMILY);
 	if (IS_ERR(msg))
 		return PTR_ERR(msg);
@@ -971,7 +971,7 @@ problem:
 
 subsys_initcall(genl_init);
 
-static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
+static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
 			 gfp_t flags)
 {
 	struct sk_buff *tmp;
@@ -986,7 +986,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
 				goto error;
 			}
 			err = nlmsg_multicast(prev->genl_sock, tmp,
-					      pid, group, flags);
+					      portid, group, flags);
 			if (err)
 				goto error;
 		}
@@ -994,20 +994,20 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
 		prev = net;
 	}
 
-	return nlmsg_multicast(prev->genl_sock, skb, pid, group, flags);
+	return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags);
  error:
 	kfree_skb(skb);
 	return err;
 }
 
-int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group,
+int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group,
 			    gfp_t flags)
 {
-	return genlmsg_mcast(skb, pid, group, flags);
+	return genlmsg_mcast(skb, portid, group, flags);
 }
 EXPORT_SYMBOL(genlmsg_multicast_allns);
 
-void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group,
 		 struct nlmsghdr *nlh, gfp_t flags)
 {
 	struct sock *sk = net->genl_sock;
@@ -1016,6 +1016,6 @@ void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
 	if (nlh)
 		report = nlmsg_report(nlh);
 
-	nlmsg_notify(sk, skb, pid, group, report, flags);
+	nlmsg_notify(sk, skb, portid, group, report, flags);
 }
 EXPORT_SYMBOL(genl_notify);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 4c51714ee741..4bbb70e32d1e 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -58,7 +58,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
 {
 	void *hdr;
 
-	hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			  &nfc_genl_family, flags, NFC_CMD_GET_TARGET);
 	if (!hdr)
 		return -EMSGSIZE;
@@ -165,7 +165,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev)
 	struct sk_buff *msg;
 	void *hdr;
 
-	dev->genl_data.poll_req_pid = 0;
+	dev->genl_data.poll_req_portid = 0;
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
 	if (!msg)
@@ -347,13 +347,13 @@ free_msg:
 }
 
 static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
-				u32 pid, u32 seq,
+				u32 portid, u32 seq,
 				struct netlink_callback *cb,
 				int flags)
 {
 	void *hdr;
 
-	hdr = genlmsg_put(msg, pid, seq, &nfc_genl_family, flags,
+	hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags,
 			  NFC_CMD_GET_DEVICE);
 	if (!hdr)
 		return -EMSGSIZE;
@@ -401,7 +401,7 @@ static int nfc_genl_dump_devices(struct sk_buff *skb,
 	while (dev) {
 		int rc;
 
-		rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).pid,
+		rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq, cb, NLM_F_MULTI);
 		if (rc < 0)
 			break;
@@ -520,7 +520,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info)
 		goto out_putdev;
 	}
 
-	rc = nfc_genl_send_device(msg, dev, info->snd_pid, info->snd_seq,
+	rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq,
 				  NULL, 0);
 	if (rc < 0)
 		goto out_free;
@@ -611,7 +611,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info)
 
 	rc = nfc_start_poll(dev, im_protocols, tm_protocols);
 	if (!rc)
-		dev->genl_data.poll_req_pid = info->snd_pid;
+		dev->genl_data.poll_req_portid = info->snd_portid;
 
 	mutex_unlock(&dev->genl_data.genl_data_mutex);
 
@@ -645,13 +645,13 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info)
 
 	mutex_lock(&dev->genl_data.genl_data_mutex);
 
-	if (dev->genl_data.poll_req_pid != info->snd_pid) {
+	if (dev->genl_data.poll_req_portid != info->snd_portid) {
 		rc = -EBUSY;
 		goto out;
 	}
 
 	rc = nfc_stop_poll(dev);
-	dev->genl_data.poll_req_pid = 0;
+	dev->genl_data.poll_req_portid = 0;
 
 out:
 	mutex_unlock(&dev->genl_data.genl_data_mutex);
@@ -771,15 +771,15 @@ static int nfc_genl_rcv_nl_event(struct notifier_block *this,
 	if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC)
 		goto out;
 
-	pr_debug("NETLINK_URELEASE event from id %d\n", n->pid);
+	pr_debug("NETLINK_URELEASE event from id %d\n", n->portid);
 
 	nfc_device_iter_init(&iter);
 	dev = nfc_device_iter_next(&iter);
 
 	while (dev) {
-		if (dev->genl_data.poll_req_pid == n->pid) {
+		if (dev->genl_data.poll_req_portid == n->portid) {
 			nfc_stop_poll(dev);
-			dev->genl_data.poll_req_pid = 0;
+			dev->genl_data.poll_req_portid = 0;
 		}
 		dev = nfc_device_iter_next(&iter);
 	}
@@ -792,7 +792,7 @@ out:
 
 void nfc_genl_data_init(struct nfc_genl_data *genl_data)
 {
-	genl_data->poll_req_pid = 0;
+	genl_data->poll_req_portid = 0;
 	mutex_init(&genl_data->genl_data_mutex);
 }
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 0da687769f56..c7425f32e7db 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -286,7 +286,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 	upcall.cmd = OVS_PACKET_CMD_ACTION;
 	upcall.key = &OVS_CB(skb)->flow->key;
 	upcall.userdata = NULL;
-	upcall.pid = 0;
+	upcall.portid = 0;
 
 	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 		 a = nla_next(a, &rem)) {
@@ -296,7 +296,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_USERSPACE_ATTR_PID:
-			upcall.pid = nla_get_u32(a);
+			upcall.portid = nla_get_u32(a);
 			break;
 		}
 	}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 105a0b5adc51..56327e877ed9 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -225,7 +225,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 		upcall.cmd = OVS_PACKET_CMD_MISS;
 		upcall.key = &key;
 		upcall.userdata = NULL;
-		upcall.pid = p->upcall_pid;
+		upcall.portid = p->upcall_portid;
 		ovs_dp_upcall(dp, skb, &upcall);
 		consume_skb(skb);
 		stats_counter = &stats->n_missed;
@@ -261,7 +261,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 	int dp_ifindex;
 	int err;
 
-	if (upcall_info->pid == 0) {
+	if (upcall_info->portid == 0) {
 		err = -ENOTCONN;
 		goto err;
 	}
@@ -395,7 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
 
 	skb_copy_and_csum_dev(skb, nla_data(nla));
 
-	err = genlmsg_unicast(net, user_skb, upcall_info->pid);
+	err = genlmsg_unicast(net, user_skb, upcall_info->portid);
 
 out:
 	kfree_skb(nskb);
@@ -780,7 +780,7 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = {
 
 /* Called with genl_lock. */
 static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
-				  struct sk_buff *skb, u32 pid,
+				  struct sk_buff *skb, u32 portid,
 				  u32 seq, u32 flags, u8 cmd)
 {
 	const int skb_orig_len = skb->len;
@@ -795,7 +795,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
 	sf_acts = rcu_dereference_protected(flow->sf_acts,
 					    lockdep_genl_is_held());
 
-	ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
+	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
 	if (!ovs_header)
 		return -EMSGSIZE;
 
@@ -879,7 +879,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
 
 static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
 					       struct datapath *dp,
-					       u32 pid, u32 seq, u8 cmd)
+					       u32 portid, u32 seq, u8 cmd)
 {
 	struct sk_buff *skb;
 	int retval;
@@ -888,7 +888,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
+	retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
 	BUG_ON(retval < 0);
 	return skb;
 }
@@ -970,7 +970,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 		flow->hash = ovs_flow_hash(&key, key_len);
 		ovs_flow_tbl_insert(table, flow);
 
-		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
 						info->snd_seq,
 						OVS_FLOW_CMD_NEW);
 	} else {
@@ -1008,7 +1008,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 			ovs_flow_deferred_free_acts(old_acts);
 		}
 
-		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
 					       info->snd_seq, OVS_FLOW_CMD_NEW);
 
 		/* Clear stats. */
@@ -1020,7 +1020,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (!IS_ERR(reply))
-		genl_notify(reply, genl_info_net(info), info->snd_pid,
+		genl_notify(reply, genl_info_net(info), info->snd_portid,
 			   ovs_dp_flow_multicast_group.id, info->nlhdr,
 			   GFP_KERNEL);
 	else
@@ -1061,7 +1061,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	if (!flow)
 		return -ENOENT;
 
-	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
 					info->snd_seq, OVS_FLOW_CMD_NEW);
 	if (IS_ERR(reply))
 		return PTR_ERR(reply);
@@ -1103,13 +1103,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_flow_tbl_remove(table, flow);
 
-	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
+	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
 				     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
 	BUG_ON(err < 0);
 
 	ovs_flow_deferred_free(flow);
 
-	genl_notify(reply, genl_info_net(info), info->snd_pid,
+	genl_notify(reply, genl_info_net(info), info->snd_portid,
 		    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
 	return 0;
 }
@@ -1137,7 +1137,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			break;
 
 		if (ovs_flow_cmd_fill_info(flow, dp, skb,
-					   NETLINK_CB(cb->skb).pid,
+					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					   OVS_FLOW_CMD_NEW) < 0)
 			break;
@@ -1191,13 +1191,13 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
 };
 
 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
-				u32 pid, u32 seq, u32 flags, u8 cmd)
+				u32 portid, u32 seq, u32 flags, u8 cmd)
 {
 	struct ovs_header *ovs_header;
 	struct ovs_dp_stats dp_stats;
 	int err;
 
-	ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
+	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
 				   flags, cmd);
 	if (!ovs_header)
 		goto error;
@@ -1222,7 +1222,7 @@ error:
 	return -EMSGSIZE;
 }
 
-static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
+static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
 					     u32 seq, u8 cmd)
 {
 	struct sk_buff *skb;
@@ -1232,7 +1232,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
+	retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
 	if (retval < 0) {
 		kfree_skb(skb);
 		return ERR_PTR(retval);
@@ -1311,7 +1311,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	parms.options = NULL;
 	parms.dp = dp;
 	parms.port_no = OVSP_LOCAL;
-	parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+	parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
 
 	vport = new_vport(&parms);
 	if (IS_ERR(vport)) {
@@ -1322,7 +1322,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto err_destroy_ports_array;
 	}
 
-	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
 				      info->snd_seq, OVS_DP_CMD_NEW);
 	err = PTR_ERR(reply);
 	if (IS_ERR(reply))
@@ -1332,7 +1332,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	list_add_tail(&dp->list_node, &ovs_net->dps);
 	rtnl_unlock();
 
-	genl_notify(reply, genl_info_net(info), info->snd_pid,
+	genl_notify(reply, genl_info_net(info), info->snd_portid,
 		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
 		    GFP_KERNEL);
 	return 0;
@@ -1394,7 +1394,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(dp))
 		return err;
 
-	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
 				      info->snd_seq, OVS_DP_CMD_DEL);
 	err = PTR_ERR(reply);
 	if (IS_ERR(reply))
@@ -1402,7 +1402,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
 
 	__dp_destroy(dp);
 
-	genl_notify(reply, genl_info_net(info), info->snd_pid,
+	genl_notify(reply, genl_info_net(info), info->snd_portid,
 		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
 		    GFP_KERNEL);
 
@@ -1419,7 +1419,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(dp))
 		return PTR_ERR(dp);
 
-	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
 				      info->snd_seq, OVS_DP_CMD_NEW);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
@@ -1428,7 +1428,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		return 0;
 	}
 
-	genl_notify(reply, genl_info_net(info), info->snd_pid,
+	genl_notify(reply, genl_info_net(info), info->snd_portid,
 		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
 		    GFP_KERNEL);
 
@@ -1444,7 +1444,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(dp))
 		return PTR_ERR(dp);
 
-	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
 				      info->snd_seq, OVS_DP_CMD_NEW);
 	if (IS_ERR(reply))
 		return PTR_ERR(reply);
@@ -1461,7 +1461,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 	list_for_each_entry(dp, &ovs_net->dps, list_node) {
 		if (i >= skip &&
-		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
+		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					 OVS_DP_CMD_NEW) < 0)
 			break;
@@ -1521,13 +1521,13 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = {
 
 /* Called with RTNL lock or RCU read lock. */
 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
-				   u32 pid, u32 seq, u32 flags, u8 cmd)
+				   u32 portid, u32 seq, u32 flags, u8 cmd)
 {
 	struct ovs_header *ovs_header;
 	struct ovs_vport_stats vport_stats;
 	int err;
 
-	ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
+	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
 				 flags, cmd);
 	if (!ovs_header)
 		return -EMSGSIZE;
@@ -1537,7 +1537,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
 	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
 	    nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
-	    nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid))
+	    nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
 		goto nla_put_failure;
 
 	ovs_vport_get_stats(vport, &vport_stats);
@@ -1559,7 +1559,7 @@ error:
 }
 
 /* Called with RTNL lock or RCU read lock. */
-struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
 					 u32 seq, u8 cmd)
 {
 	struct sk_buff *skb;
@@ -1569,7 +1569,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
-	retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
+	retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
 	if (retval < 0) {
 		kfree_skb(skb);
 		return ERR_PTR(retval);
@@ -1661,21 +1661,21 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
 	parms.dp = dp;
 	parms.port_no = port_no;
-	parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+	parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
 
 	vport = new_vport(&parms);
 	err = PTR_ERR(vport);
 	if (IS_ERR(vport))
 		goto exit_unlock;
 
-	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
 					 OVS_VPORT_CMD_NEW);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		ovs_dp_detach_port(vport);
 		goto exit_unlock;
 	}
-	genl_notify(reply, genl_info_net(info), info->snd_pid,
+	genl_notify(reply, genl_info_net(info), info->snd_portid,
 		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
 
 exit_unlock:
@@ -1707,9 +1707,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto exit_unlock;
 	if (a[OVS_VPORT_ATTR_UPCALL_PID])
-		vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+		vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
 
-	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
 					 OVS_VPORT_CMD_NEW);
 	if (IS_ERR(reply)) {
 		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
@@ -1717,7 +1717,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	genl_notify(reply, genl_info_net(info), info->snd_pid,
+	genl_notify(reply, genl_info_net(info), info->snd_portid,
 		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
 
 exit_unlock:
@@ -1743,7 +1743,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
 					 OVS_VPORT_CMD_DEL);
 	err = PTR_ERR(reply);
 	if (IS_ERR(reply))
@@ -1751,7 +1751,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
 
 	ovs_dp_detach_port(vport);
 
-	genl_notify(reply, genl_info_net(info), info->snd_pid,
+	genl_notify(reply, genl_info_net(info), info->snd_portid,
 		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
 
 exit_unlock:
@@ -1773,7 +1773,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(vport))
 		goto exit_unlock;
 
-	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+	reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
 					 OVS_VPORT_CMD_NEW);
 	err = PTR_ERR(reply);
 	if (IS_ERR(reply))
@@ -1808,7 +1808,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) {
 			if (j >= skip &&
 			    ovs_vport_cmd_fill_info(vport, skb,
-						    NETLINK_CB(cb->skb).pid,
+						    NETLINK_CB(cb->skb).portid,
 						    cb->nlh->nlmsg_seq,
 						    NLM_F_MULTI,
 						    OVS_VPORT_CMD_NEW) < 0)
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 129ec5480758..031dfbf37c93 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -129,7 +129,7 @@ struct dp_upcall_info {
 	u8 cmd;
 	const struct sw_flow_key *key;
 	const struct nlattr *userdata;
-	u32 pid;
+	u32 portid;
 };
 
 static inline struct net *ovs_dp_get_net(struct datapath *dp)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 1abd9609ba78..9055dd698c70 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -125,7 +125,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 
 	vport->dp = parms->dp;
 	vport->port_no = parms->port_no;
-	vport->upcall_pid = parms->upcall_pid;
+	vport->upcall_portid = parms->upcall_portid;
 	vport->ops = ops;
 	INIT_HLIST_NODE(&vport->dp_hash_node);
 
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index c56e4836e93b..3f7961ea3c56 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -70,7 +70,7 @@ struct vport_err_stats {
  * @rcu: RCU callback head for deferred destruction.
  * @port_no: Index into @dp's @ports array.
  * @dp: Datapath to which this port belongs.
- * @upcall_pid: The Netlink port to use for packets received on this port that
+ * @upcall_portid: The Netlink port to use for packets received on this port that
  * miss the flow table.
  * @hash_node: Element in @dev_table hash table in vport.c.
  * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
@@ -83,7 +83,7 @@ struct vport {
 	struct rcu_head rcu;
 	u16 port_no;
 	struct datapath	*dp;
-	u32 upcall_pid;
+	u32 upcall_portid;
 
 	struct hlist_node hash_node;
 	struct hlist_node dp_hash_node;
@@ -113,7 +113,7 @@ struct vport_parms {
 	/* For ovs_vport_alloc(). */
 	struct datapath *dp;
 	u16 port_no;
-	u32 upcall_pid;
+	u32 upcall_portid;
 };
 
 /**
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 39bce0d50df9..8db6e21c46bd 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -126,13 +126,13 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
 }
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
-		u32 pid, u32 seq, u32 flags, int sk_ino)
+		u32 portid, u32 seq, u32 flags, int sk_ino)
 {
 	struct nlmsghdr *nlh;
 	struct packet_diag_msg *rp;
 	struct packet_sock *po = pkt_sk(sk);
 
-	nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags);
+	nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -184,7 +184,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (num < s_num)
 			goto next;
 
-		if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid,
+		if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					sock_i_ino(sk)) < 0)
 			goto done;
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 7dd762a464e5..83a8389619aa 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -33,7 +33,7 @@
 /* Device address handling */
 
 static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
-		     u32 pid, u32 seq, int event);
+		     u32 portid, u32 seq, int event);
 
 void phonet_address_notify(int event, struct net_device *dev, u8 addr)
 {
@@ -101,12 +101,12 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
 }
 
 static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
-			u32 pid, u32 seq, int event)
+			u32 portid, u32 seq, int event)
 {
 	struct ifaddrmsg *ifm;
 	struct nlmsghdr *nlh;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), 0);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -148,7 +148,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 
 			if (fill_addr(skb, pnd->netdev, addr << 2,
-					 NETLINK_CB(cb->skb).pid,
+					 NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0)
 				goto out;
 		}
@@ -165,12 +165,12 @@ out:
 /* Routes handling */
 
 static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
-			u32 pid, u32 seq, int event)
+			u32 portid, u32 seq, int event)
 {
 	struct rtmsg *rtm;
 	struct nlmsghdr *nlh;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), 0);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -276,7 +276,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 
 		if (addr_idx++ < addr_start_idx)
 			continue;
-		if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid,
+		if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, RTM_NEWROUTE))
 			goto out;
 	}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e3d2c78cb52c..102761d294cb 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -644,7 +644,7 @@ errout:
 }
 
 static int
-tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
+tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq,
 	     u16 flags, int event, int bind, int ref)
 {
 	struct tcamsg *t;
@@ -652,7 +652,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
 	if (!nlh)
 		goto out_nlmsg_trim;
 	t = nlmsg_data(nlh);
@@ -678,7 +678,7 @@ out_nlmsg_trim:
 }
 
 static int
-act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
+act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
 	       struct tc_action *a, int event)
 {
 	struct sk_buff *skb;
@@ -686,16 +686,16 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
-	if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+	if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
-	return rtnl_unicast(skb, net, pid);
+	return rtnl_unicast(skb, net, portid);
 }
 
 static struct tc_action *
-tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
+tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid)
 {
 	struct nlattr *tb[TCA_ACT_MAX + 1];
 	struct tc_action *a;
@@ -762,7 +762,7 @@ static struct tc_action *create_a(int i)
 }
 
 static int tca_action_flush(struct net *net, struct nlattr *nla,
-			    struct nlmsghdr *n, u32 pid)
+			    struct nlmsghdr *n, u32 portid)
 {
 	struct sk_buff *skb;
 	unsigned char *b;
@@ -799,7 +799,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	if (a->ops == NULL)
 		goto err_out;
 
-	nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
+	nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
 	t = nlmsg_data(nlh);
@@ -823,7 +823,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	nlh->nlmsg_flags |= NLM_F_ROOT;
 	module_put(a->ops->owner);
 	kfree(a);
-	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 			     n->nlmsg_flags & NLM_F_ECHO);
 	if (err > 0)
 		return 0;
@@ -841,7 +841,7 @@ noflush_out:
 
 static int
 tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
-	      u32 pid, int event)
+	      u32 portid, int event)
 {
 	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -853,13 +853,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 
 	if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
 		if (tb[1] != NULL)
-			return tca_action_flush(net, tb[1], n, pid);
+			return tca_action_flush(net, tb[1], n, portid);
 		else
 			return -EINVAL;
 	}
 
 	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-		act = tcf_action_get_1(tb[i], n, pid);
+		act = tcf_action_get_1(tb[i], n, portid);
 		if (IS_ERR(act)) {
 			ret = PTR_ERR(act);
 			goto err;
@@ -874,7 +874,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	}
 
 	if (event == RTM_GETACTION)
-		ret = act_get_notify(net, pid, n, head, event);
+		ret = act_get_notify(net, portid, n, head, event);
 	else { /* delete */
 		struct sk_buff *skb;
 
@@ -884,7 +884,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 			goto err;
 		}
 
-		if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event,
+		if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event,
 				 0, 1) <= 0) {
 			kfree_skb(skb);
 			ret = -EINVAL;
@@ -893,7 +893,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 
 		/* now do the delete */
 		tcf_action_destroy(head, 0);
-		ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+		ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 				     n->nlmsg_flags & NLM_F_ECHO);
 		if (ret > 0)
 			return 0;
@@ -905,7 +905,7 @@ err:
 }
 
 static int tcf_add_notify(struct net *net, struct tc_action *a,
-			  u32 pid, u32 seq, int event, u16 flags)
+			  u32 portid, u32 seq, int event, u16 flags)
 {
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
@@ -920,7 +920,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
 
 	b = skb_tail_pointer(skb);
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
 	if (!nlh)
 		goto out_kfree_skb;
 	t = nlmsg_data(nlh);
@@ -940,7 +940,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
-	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
 	if (err > 0)
 		err = 0;
 	return err;
@@ -953,7 +953,7 @@ out_kfree_skb:
 
 static int
 tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
-	       u32 pid, int ovr)
+	       u32 portid, int ovr)
 {
 	int ret = 0;
 	struct tc_action *act;
@@ -971,7 +971,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 	/* dump then free all the actions after update; inserted policy
 	 * stays intact
 	 */
-	ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
+	ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags);
 	for (a = act; a; a = act) {
 		act = a->next;
 		kfree(a);
@@ -984,7 +984,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_ACT_MAX + 1];
-	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
 	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
@@ -1008,17 +1008,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if (n->nlmsg_flags & NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
+		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
 	case RTM_DELACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-				    pid, RTM_DELACTION);
+				    portid, RTM_DELACTION);
 		break;
 	case RTM_GETACTION:
 		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
-				    pid, RTM_GETACTION);
+				    portid, RTM_GETACTION);
 		break;
 	default:
 		BUG();
@@ -1085,7 +1085,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 		goto out_module_put;
 	}
 
-	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
@@ -1109,7 +1109,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 		nla_nest_cancel(skb, nest);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
-	if (NETLINK_CB(cb->skb).pid && ret)
+	if (NETLINK_CB(cb->skb).portid && ret)
 		nlh->nlmsg_flags |= NLM_F_MULTI;
 	module_put(a_o->owner);
 	return skb->len;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index dc3ef5aef355..7ae02892437c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -343,13 +343,13 @@ errout:
 }
 
 static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
-			 unsigned long fh, u32 pid, u32 seq, u16 flags, int event)
+			 unsigned long fh, u32 portid, u32 seq, u16 flags, int event)
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
 	if (!nlh)
 		goto out_nlmsg_trim;
 	tcm = nlmsg_data(nlh);
@@ -381,18 +381,18 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 			  unsigned long fh, int event)
 {
 	struct sk_buff *skb;
-	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
-	if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) {
+	if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 			      n->nlmsg_flags & NLM_F_ECHO);
 }
 
@@ -407,7 +407,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
 {
 	struct tcf_dump_args *a = (void *)arg;
 
-	return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid,
+	return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
 			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
 }
 
@@ -465,7 +465,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		if (t > s_t)
 			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
 		if (cb->args[1] == 0) {
-			if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
+			if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					  RTM_NEWTFILTER) <= 0)
 				break;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index a08b4ab3e421..a18d975db59c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1185,7 +1185,7 @@ graft:
 }
 
 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
-			 u32 pid, u32 seq, u16 flags, int event)
+			 u32 portid, u32 seq, u16 flags, int event)
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
@@ -1193,7 +1193,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	struct gnet_dump d;
 	struct qdisc_size_table *stab;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
 	if (!nlh)
 		goto out_nlmsg_trim;
 	tcm = nlmsg_data(nlh);
@@ -1248,25 +1248,25 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 			struct Qdisc *old, struct Qdisc *new)
 {
 	struct sk_buff *skb;
-	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
 	if (old && !tc_qdisc_dump_ignore(old)) {
-		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
+		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
 				  0, RTM_DELQDISC) < 0)
 			goto err_out;
 	}
 	if (new && !tc_qdisc_dump_ignore(new)) {
-		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
+		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
 			goto err_out;
 	}
 
 	if (skb->len)
-		return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 				      n->nlmsg_flags & NLM_F_ECHO);
 
 err_out:
@@ -1289,7 +1289,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 		q_idx++;
 	} else {
 		if (!tc_qdisc_dump_ignore(q) &&
-		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
 				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
 			goto done;
 		q_idx++;
@@ -1300,7 +1300,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			continue;
 		}
 		if (!tc_qdisc_dump_ignore(q) &&
-		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
 				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
 			goto done;
 		q_idx++;
@@ -1375,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl = 0;
 	unsigned long new_cl;
-	u32 pid = tcm->tcm_parent;
+	u32 portid = tcm->tcm_parent;
 	u32 clid = tcm->tcm_handle;
 	u32 qid = TC_H_MAJ(clid);
 	int err;
@@ -1403,8 +1403,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	/* Step 1. Determine qdisc handle X:0 */
 
-	if (pid != TC_H_ROOT) {
-		u32 qid1 = TC_H_MAJ(pid);
+	if (portid != TC_H_ROOT) {
+		u32 qid1 = TC_H_MAJ(portid);
 
 		if (qid && qid1) {
 			/* If both majors are known, they must be identical. */
@@ -1418,10 +1418,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		/* Now qid is genuine qdisc handle consistent
 		 * both with parent and child.
 		 *
-		 * TC_H_MAJ(pid) still may be unspecified, complete it now.
+		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
 		 */
-		if (pid)
-			pid = TC_H_MAKE(qid, pid);
+		if (portid)
+			portid = TC_H_MAKE(qid, portid);
 	} else {
 		if (qid == 0)
 			qid = dev->qdisc->handle;
@@ -1439,7 +1439,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	/* Now try to get class */
 	if (clid == 0) {
-		if (pid == TC_H_ROOT)
+		if (portid == TC_H_ROOT)
 			clid = qid;
 	} else
 		clid = TC_H_MAKE(qid, clid);
@@ -1478,7 +1478,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	new_cl = cl;
 	err = -EOPNOTSUPP;
 	if (cops->change)
-		err = cops->change(q, clid, pid, tca, &new_cl);
+		err = cops->change(q, clid, portid, tca, &new_cl);
 	if (err == 0)
 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
 
@@ -1492,7 +1492,7 @@ out:
 
 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 			  unsigned long cl,
-			  u32 pid, u32 seq, u16 flags, int event)
+			  u32 portid, u32 seq, u16 flags, int event)
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
@@ -1500,7 +1500,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	struct gnet_dump d;
 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
 
-	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags);
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
 	if (!nlh)
 		goto out_nlmsg_trim;
 	tcm = nlmsg_data(nlh);
@@ -1540,18 +1540,18 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 			 unsigned long cl, int event)
 {
 	struct sk_buff *skb;
-	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOBUFS;
 
-	if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
+	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
 			      n->nlmsg_flags & NLM_F_ECHO);
 }
 
@@ -1565,7 +1565,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
 {
 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
 
-	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
+	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
 }
 
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 47a839df27dc..6675914dc592 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -62,7 +62,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 		rep_nlh = nlmsg_hdr(rep_buf);
 		memcpy(rep_nlh, req_nlh, hdr_space);
 		rep_nlh->nlmsg_len = rep_buf->len;
-		genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).pid);
+		genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid);
 	}
 
 	return 0;
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 750b13408449..06748f108a57 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -110,12 +110,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
 }
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
-		u32 pid, u32 seq, u32 flags, int sk_ino)
+		u32 portid, u32 seq, u32 flags, int sk_ino)
 {
 	struct nlmsghdr *nlh;
 	struct unix_diag_msg *rep;
 
-	nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+	nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
 			flags);
 	if (!nlh)
 		return -EMSGSIZE;
@@ -159,7 +159,7 @@ out_nlmsg_trim:
 }
 
 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
-		u32 pid, u32 seq, u32 flags)
+		u32 portid, u32 seq, u32 flags)
 {
 	int sk_ino;
 
@@ -170,7 +170,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	if (!sk_ino)
 		return 0;
 
-	return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino);
+	return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino);
 }
 
 static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -200,7 +200,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			if (!(req->udiag_states & (1 << sk->sk_state)))
 				goto next;
 			if (sk_diag_dump(sk, skb, req,
-					 NETLINK_CB(cb->skb).pid,
+					 NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq,
 					 NLM_F_MULTI) < 0)
 				goto done;
@@ -267,7 +267,7 @@ again:
 	if (!rep)
 		goto out;
 
-	err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid,
+	err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid,
 			   nlh->nlmsg_seq, 0, req->udiag_ino);
 	if (err < 0) {
 		nlmsg_free(rep);
@@ -277,7 +277,7 @@ again:
 
 		goto again;
 	}
-	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid,
+	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
 			      MSG_DONTWAIT);
 	if (err > 0)
 		err = 0;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index bc7430b54771..a343be4a52bd 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -55,7 +55,7 @@ struct cfg80211_registered_device {
 	int opencount; /* also protected by devlist_mtx */
 	wait_queue_head_t dev_wait;
 
-	u32 ap_beacons_nlpid;
+	u32 ap_beacons_nlportid;
 
 	/* protected by RTNL only */
 	int num_running_ifaces;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 8fd0242ee169..ec7fcee5bad6 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -615,7 +615,7 @@ EXPORT_SYMBOL(cfg80211_del_sta);
 struct cfg80211_mgmt_registration {
 	struct list_head list;
 
-	u32 nlpid;
+	u32 nlportid;
 
 	int match_len;
 
@@ -624,7 +624,7 @@ struct cfg80211_mgmt_registration {
 	u8 match[];
 };
 
-int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
+int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
 				u16 frame_type, const u8 *match_data,
 				int match_len)
 {
@@ -672,7 +672,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
 
 	memcpy(nreg->match, match_data, match_len);
 	nreg->match_len = match_len;
-	nreg->nlpid = snd_pid;
+	nreg->nlportid = snd_portid;
 	nreg->frame_type = cpu_to_le16(frame_type);
 	list_add(&nreg->list, &wdev->mgmt_registrations);
 
@@ -685,7 +685,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
 	return err;
 }
 
-void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
+void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
 {
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
@@ -694,7 +694,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
 	spin_lock_bh(&wdev->mgmt_registrations_lock);
 
 	list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
-		if (reg->nlpid != nlpid)
+		if (reg->nlportid != nlportid)
 			continue;
 
 		if (rdev->ops->mgmt_frame_register) {
@@ -710,8 +710,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
 
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 
-	if (nlpid == wdev->ap_unexpected_nlpid)
-		wdev->ap_unexpected_nlpid = 0;
+	if (nlportid == wdev->ap_unexpected_nlportid)
+		wdev->ap_unexpected_nlportid = 0;
 }
 
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
@@ -872,7 +872,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
 		/* found match! */
 
 		/* Indicate the received Action frame to user space */
-		if (nl80211_send_mgmt(rdev, wdev, reg->nlpid,
+		if (nl80211_send_mgmt(rdev, wdev, reg->nlportid,
 				      freq, sig_mbm,
 				      buf, len, gfp))
 			continue;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 787aeaa902fe..34eb5c07a567 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -496,11 +496,11 @@ static bool is_valid_ie_attr(const struct nlattr *attr)
 }
 
 /* message building helper */
-static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq,
+static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
 				   int flags, u8 cmd)
 {
 	/* since there is no private header just add the generic one */
-	return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd);
+	return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd);
 }
 
 static int nl80211_msg_put_channel(struct sk_buff *msg,
@@ -851,7 +851,7 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
+static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags,
 			      struct cfg80211_registered_device *dev)
 {
 	void *hdr;
@@ -866,7 +866,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	const struct ieee80211_txrx_stypes *mgmt_stypes =
 				dev->wiphy.mgmt_stypes;
 
-	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
 	if (!hdr)
 		return -1;
 
@@ -1267,7 +1267,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 		if (++idx <= start)
 			continue;
-		if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid,
+		if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid,
 				       cb->nlh->nlmsg_seq, NLM_F_MULTI,
 				       dev) < 0) {
 			idx--;
@@ -1290,7 +1290,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) {
+	if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
 	}
@@ -1736,14 +1736,14 @@ static inline u64 wdev_id(struct wireless_dev *wdev)
 	       ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32);
 }
 
-static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
+static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
 			      struct cfg80211_registered_device *rdev,
 			      struct wireless_dev *wdev)
 {
 	struct net_device *dev = wdev->netdev;
 	void *hdr;
 
-	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_INTERFACE);
 	if (!hdr)
 		return -1;
 
@@ -1807,7 +1807,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 				if_idx++;
 				continue;
 			}
-			if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid,
+			if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					       rdev, wdev) < 0) {
 				mutex_unlock(&rdev->devlist_mtx);
@@ -1838,7 +1838,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0,
+	if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
 			       dev, wdev) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
@@ -2056,7 +2056,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 		break;
 	}
 
-	if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0,
+	if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0,
 			       rdev, wdev) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
@@ -2191,7 +2191,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_NEW_KEY);
 	if (IS_ERR(hdr))
 		return PTR_ERR(hdr);
@@ -2769,7 +2769,7 @@ nla_put_failure:
 	return false;
 }
 
-static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
+static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
 				int flags,
 				struct cfg80211_registered_device *rdev,
 				struct net_device *dev,
@@ -2778,7 +2778,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	void *hdr;
 	struct nlattr *sinfoattr, *bss_param;
 
-	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION);
 	if (!hdr)
 		return -1;
 
@@ -2931,7 +2931,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
 			goto out_err;
 
 		if (nl80211_send_station(skb,
-				NETLINK_CB(cb->skb).pid,
+				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
 				dev, netdev, mac_addr,
 				&sinfo) < 0)
@@ -2977,7 +2977,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0,
+	if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0,
 				 rdev, dev, mac_addr, &sinfo) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
@@ -3303,7 +3303,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
 	return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr);
 }
 
-static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
+static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq,
 				int flags, struct net_device *dev,
 				u8 *dst, u8 *next_hop,
 				struct mpath_info *pinfo)
@@ -3311,7 +3311,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
 	void *hdr;
 	struct nlattr *pinfoattr;
 
-	hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION);
 	if (!hdr)
 		return -1;
 
@@ -3389,7 +3389,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 		if (err)
 			goto out_err;
 
-		if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid,
+		if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid,
 				       cb->nlh->nlmsg_seq, NLM_F_MULTI,
 				       netdev, dst, next_hop,
 				       &pinfo) < 0)
@@ -3438,7 +3438,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0,
+	if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0,
 				 dev, dst, next_hop, &pinfo) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
@@ -3679,7 +3679,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return -ENOMEM;
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_GET_MESH_CONFIG);
 	if (!hdr)
 		goto out;
@@ -3998,7 +3998,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_GET_REG);
 	if (!hdr)
 		goto put_failure;
@@ -4616,7 +4616,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).pid, seq, flags,
+	hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags,
 			     NL80211_CMD_NEW_SCAN_RESULTS);
 	if (!hdr)
 		return -1;
@@ -4735,14 +4735,14 @@ static int nl80211_dump_scan(struct sk_buff *skb,
 	return skb->len;
 }
 
-static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
+static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 				int flags, struct net_device *dev,
 				struct survey_info *survey)
 {
 	void *hdr;
 	struct nlattr *infoattr;
 
-	hdr = nl80211hdr_put(msg, pid, seq, flags,
+	hdr = nl80211hdr_put(msg, portid, seq, flags,
 			     NL80211_CMD_NEW_SURVEY_RESULTS);
 	if (!hdr)
 		return -ENOMEM;
@@ -4836,7 +4836,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 		}
 
 		if (nl80211_send_survey(skb,
-				NETLINK_CB(cb->skb).pid,
+				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
 				netdev,
 				&survey) < 0)
@@ -5451,7 +5451,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
 	}
 
 	while (1) {
-		void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).pid,
+		void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					   NL80211_CMD_TESTMODE);
 		struct nlattr *tmdata;
@@ -5491,7 +5491,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
 
 static struct sk_buff *
 __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev,
-			      int approxlen, u32 pid, u32 seq, gfp_t gfp)
+			      int approxlen, u32 portid, u32 seq, gfp_t gfp)
 {
 	struct sk_buff *skb;
 	void *hdr;
@@ -5501,7 +5501,7 @@ __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev,
 	if (!skb)
 		return NULL;
 
-	hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE);
+	hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE);
 	if (!hdr) {
 		kfree_skb(skb);
 		return NULL;
@@ -5531,7 +5531,7 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy,
 		return NULL;
 
 	return __cfg80211_testmode_alloc_skb(rdev, approxlen,
-				rdev->testmode_info->snd_pid,
+				rdev->testmode_info->snd_portid,
 				rdev->testmode_info->snd_seq,
 				GFP_KERNEL);
 }
@@ -5867,7 +5867,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_REMAIN_ON_CHANNEL);
 
 	if (IS_ERR(hdr)) {
@@ -6086,7 +6086,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
 	if (!rdev->ops->mgmt_tx)
 		return -EOPNOTSUPP;
 
-	return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type,
+	return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type,
 			nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
 			nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
 }
@@ -6167,7 +6167,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 		if (!msg)
 			return -ENOMEM;
 
-		hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+		hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 				     NL80211_CMD_FRAME);
 
 		if (IS_ERR(hdr)) {
@@ -6284,7 +6284,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_GET_POWER_SAVE);
 	if (!hdr) {
 		err = -ENOBUFS;
@@ -6486,7 +6486,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_GET_WOWLAN);
 	if (!hdr)
 		goto nla_put_failure;
@@ -6760,10 +6760,10 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb,
 	    wdev->iftype != NL80211_IFTYPE_P2P_GO)
 		return -EINVAL;
 
-	if (wdev->ap_unexpected_nlpid)
+	if (wdev->ap_unexpected_nlportid)
 		return -EBUSY;
 
-	wdev->ap_unexpected_nlpid = info->snd_pid;
+	wdev->ap_unexpected_nlportid = info->snd_portid;
 	return 0;
 }
 
@@ -6793,7 +6793,7 @@ static int nl80211_probe_client(struct sk_buff *skb,
 	if (!msg)
 		return -ENOMEM;
 
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_PROBE_CLIENT);
 
 	if (IS_ERR(hdr)) {
@@ -6828,10 +6828,10 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info)
 	if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS))
 		return -EOPNOTSUPP;
 
-	if (rdev->ap_beacons_nlpid)
+	if (rdev->ap_beacons_nlportid)
 		return -EBUSY;
 
-	rdev->ap_beacons_nlpid = info->snd_pid;
+	rdev->ap_beacons_nlportid = info->snd_portid;
 
 	return 0;
 }
@@ -7628,12 +7628,12 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
 static int nl80211_send_scan_msg(struct sk_buff *msg,
 				 struct cfg80211_registered_device *rdev,
 				 struct wireless_dev *wdev,
-				 u32 pid, u32 seq, int flags,
+				 u32 portid, u32 seq, int flags,
 				 u32 cmd)
 {
 	void *hdr;
 
-	hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
 	if (!hdr)
 		return -1;
 
@@ -7657,11 +7657,11 @@ static int
 nl80211_send_sched_scan_msg(struct sk_buff *msg,
 			    struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev,
-			    u32 pid, u32 seq, int flags, u32 cmd)
+			    u32 portid, u32 seq, int flags, u32 cmd)
 {
 	void *hdr;
 
-	hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
 	if (!hdr)
 		return -1;
 
@@ -8370,9 +8370,9 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 	struct sk_buff *msg;
 	void *hdr;
 	int err;
-	u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid);
+	u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid);
 
-	if (!nlpid)
+	if (!nlportid)
 		return false;
 
 	msg = nlmsg_new(100, gfp);
@@ -8396,7 +8396,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 		return true;
 	}
 
-	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
 	return true;
 
  nla_put_failure:
@@ -8420,7 +8420,7 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev,
 }
 
 int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
-		      struct wireless_dev *wdev, u32 nlpid,
+		      struct wireless_dev *wdev, u32 nlportid,
 		      int freq, int sig_dbm,
 		      const u8 *buf, size_t len, gfp_t gfp)
 {
@@ -8449,7 +8449,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 
 	genlmsg_end(msg, hdr);
 
-	return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -8804,9 +8804,9 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct sk_buff *msg;
 	void *hdr;
-	u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid);
+	u32 nlportid = ACCESS_ONCE(rdev->ap_beacons_nlportid);
 
-	if (!nlpid)
+	if (!nlportid)
 		return;
 
 	msg = nlmsg_new(len + 100, gfp);
@@ -8829,7 +8829,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
 	return;
 
  nla_put_failure:
@@ -8853,9 +8853,9 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
 		list_for_each_entry_rcu(wdev, &rdev->wdev_list, list)
-			cfg80211_mlme_unregister_socket(wdev, notify->pid);
-		if (rdev->ap_beacons_nlpid == notify->pid)
-			rdev->ap_beacons_nlpid = 0;
+			cfg80211_mlme_unregister_socket(wdev, notify->portid);
+		if (rdev->ap_beacons_nlportid == notify->portid)
+			rdev->ap_beacons_nlportid = 0;
 	}
 
 	rcu_read_unlock();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7856c33898fa..30edad44e7fc 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -166,7 +166,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 int __xfrm_state_delete(struct xfrm_state *x);
 
 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
-void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
+void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 
 static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family)
 {
@@ -1674,13 +1674,13 @@ void km_state_notify(struct xfrm_state *x, const struct km_event *c)
 EXPORT_SYMBOL(km_policy_notify);
 EXPORT_SYMBOL(km_state_notify);
 
-void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
+void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
 {
 	struct net *net = xs_net(x);
 	struct km_event c;
 
 	c.data.hard = hard;
-	c.pid = pid;
+	c.portid = portid;
 	c.event = XFRM_MSG_EXPIRE;
 	km_state_notify(x, &c);
 
@@ -1726,13 +1726,13 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
 }
 EXPORT_SYMBOL(km_new_mapping);
 
-void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
+void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
 {
 	struct net *net = xp_net(pol);
 	struct km_event c;
 
 	c.data.hard = hard;
-	c.pid = pid;
+	c.portid = portid;
 	c.event = XFRM_MSG_POLEXPIRE;
 	km_policy_notify(pol, dir, &c);
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 354070adb5ef..b313d932d678 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -603,7 +603,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	}
 
 	c.seq = nlh->nlmsg_seq;
-	c.pid = nlh->nlmsg_pid;
+	c.portid = nlh->nlmsg_pid;
 	c.event = nlh->nlmsg_type;
 
 	km_state_notify(x, &c);
@@ -676,7 +676,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto out;
 
 	c.seq = nlh->nlmsg_seq;
-	c.pid = nlh->nlmsg_pid;
+	c.portid = nlh->nlmsg_pid;
 	c.event = nlh->nlmsg_type;
 	km_state_notify(x, &c);
 
@@ -826,7 +826,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	struct nlmsghdr *nlh;
 	int err;
 
-	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq,
+	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq,
 			XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
@@ -904,7 +904,7 @@ static inline size_t xfrm_spdinfo_msgsize(void)
 }
 
 static int build_spdinfo(struct sk_buff *skb, struct net *net,
-			 u32 pid, u32 seq, u32 flags)
+			 u32 portid, u32 seq, u32 flags)
 {
 	struct xfrmk_spdinfo si;
 	struct xfrmu_spdinfo spc;
@@ -913,7 +913,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
 	int err;
 	u32 *f;
 
-	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
+	nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0);
 	if (nlh == NULL) /* shouldn't really happen ... */
 		return -EMSGSIZE;
 
@@ -946,17 +946,17 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	struct sk_buff *r_skb;
 	u32 *flags = nlmsg_data(nlh);
-	u32 spid = NETLINK_CB(skb).pid;
+	u32 sportid = NETLINK_CB(skb).portid;
 	u32 seq = nlh->nlmsg_seq;
 
 	r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC);
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0)
+	if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0)
 		BUG();
 
-	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
+	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
 }
 
 static inline size_t xfrm_sadinfo_msgsize(void)
@@ -967,7 +967,7 @@ static inline size_t xfrm_sadinfo_msgsize(void)
 }
 
 static int build_sadinfo(struct sk_buff *skb, struct net *net,
-			 u32 pid, u32 seq, u32 flags)
+			 u32 portid, u32 seq, u32 flags)
 {
 	struct xfrmk_sadinfo si;
 	struct xfrmu_sadhinfo sh;
@@ -975,7 +975,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
 	int err;
 	u32 *f;
 
-	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
+	nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
 	if (nlh == NULL) /* shouldn't really happen ... */
 		return -EMSGSIZE;
 
@@ -1003,17 +1003,17 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	struct sk_buff *r_skb;
 	u32 *flags = nlmsg_data(nlh);
-	u32 spid = NETLINK_CB(skb).pid;
+	u32 sportid = NETLINK_CB(skb).portid;
 	u32 seq = nlh->nlmsg_seq;
 
 	r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC);
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0)
+	if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0)
 		BUG();
 
-	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
+	return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid);
 }
 
 static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1033,7 +1033,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (IS_ERR(resp_skb)) {
 		err = PTR_ERR(resp_skb);
 	} else {
-		err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid);
+		err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
 	}
 	xfrm_state_put(x);
 out_noput:
@@ -1114,7 +1114,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto out;
 	}
 
-	err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid);
+	err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid);
 
 out:
 	xfrm_state_put(x);
@@ -1401,7 +1401,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-	c.pid = nlh->nlmsg_pid;
+	c.portid = nlh->nlmsg_pid;
 	km_policy_notify(xp, p->dir, &c);
 
 	xfrm_pol_put(xp);
@@ -1486,7 +1486,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 	struct nlmsghdr *nlh;
 	int err;
 
-	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq,
+	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq,
 			XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags);
 	if (nlh == NULL)
 		return -EMSGSIZE;
@@ -1621,7 +1621,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 			err = PTR_ERR(resp_skb);
 		} else {
 			err = nlmsg_unicast(net->xfrm.nlsk, resp_skb,
-					    NETLINK_CB(skb).pid);
+					    NETLINK_CB(skb).portid);
 		}
 	} else {
 		uid_t loginuid = audit_get_loginuid(current);
@@ -1638,7 +1638,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 		c.data.byid = p->index;
 		c.event = nlh->nlmsg_type;
 		c.seq = nlh->nlmsg_seq;
-		c.pid = nlh->nlmsg_pid;
+		c.portid = nlh->nlmsg_pid;
 		km_policy_notify(xp, p->dir, &c);
 	}
 
@@ -1668,7 +1668,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	c.data.proto = p->proto;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-	c.pid = nlh->nlmsg_pid;
+	c.portid = nlh->nlmsg_pid;
 	c.net = net;
 	km_state_notify(NULL, &c);
 
@@ -1695,7 +1695,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
 	struct nlmsghdr *nlh;
 	int err;
 
-	nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0);
+	nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -1777,11 +1777,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 	spin_lock_bh(&x->lock);
 	c.data.aevent = p->flags;
 	c.seq = nlh->nlmsg_seq;
-	c.pid = nlh->nlmsg_pid;
+	c.portid = nlh->nlmsg_pid;
 
 	if (build_aevent(r_skb, x, &c) < 0)
 		BUG();
-	err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid);
+	err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid);
 	spin_unlock_bh(&x->lock);
 	xfrm_state_put(x);
 	return err;
@@ -1827,7 +1827,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-	c.pid = nlh->nlmsg_pid;
+	c.portid = nlh->nlmsg_pid;
 	c.data.aevent = XFRM_AE_CU;
 	km_state_notify(x, &c);
 	err = 0;
@@ -1862,7 +1862,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
 	c.data.type = type;
 	c.event = nlh->nlmsg_type;
 	c.seq = nlh->nlmsg_seq;
-	c.pid = nlh->nlmsg_pid;
+	c.portid = nlh->nlmsg_pid;
 	c.net = net;
 	km_policy_notify(NULL, 0, &c);
 	return 0;
@@ -2370,7 +2370,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
 	struct nlmsghdr *nlh;
 	int err;
 
-	nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0);
+	nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -2429,7 +2429,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c)
 	if (skb == NULL)
 		return -ENOMEM;
 
-	nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0);
+	nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0);
 	if (nlh == NULL) {
 		kfree_skb(skb);
 		return -EMSGSIZE;
@@ -2497,7 +2497,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
 	if (skb == NULL)
 		return -ENOMEM;
 
-	nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0);
+	nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0);
 	err = -EMSGSIZE;
 	if (nlh == NULL)
 		goto out_free_skb;
@@ -2696,7 +2696,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	struct nlmsghdr *nlh;
 	int err;
 
-	nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0);
+	nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0);
 	if (nlh == NULL)
 		return -EMSGSIZE;
 
@@ -2756,7 +2756,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e
 	if (skb == NULL)
 		return -ENOMEM;
 
-	nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0);
+	nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0);
 	err = -EMSGSIZE;
 	if (nlh == NULL)
 		goto out_free_skb;
@@ -2810,7 +2810,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c)
 	if (skb == NULL)
 		return -ENOMEM;
 
-	nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0);
+	nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0);
 	err = -EMSGSIZE;
 	if (nlh == NULL)
 		goto out_free_skb;
-- 
cgit v1.2.3


From b6069a95706ca5738be3f5d90fd286cbd13ac695 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 7 Sep 2012 22:03:35 +0000
Subject: filter: add MOD operation

Add a new ALU opcode, to compute a modulus.

Commit ffe06c17afbbb used an ancillary to implement XOR_X,
but here we reserve one of the available ALU opcode to implement both
MOD_X and MOD_K

Signed-off-by: Eric Dumazet <edumazet@google.com>
Suggested-by: George Bakos <gbakos@alpinista.org>
Cc: Jay Schulist <jschlst@samba.org>
Cc: Jiri Pirko <jpirko@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  4 ++++
 net/core/filter.c      | 15 +++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 82b01357af8b..3cf5fd561d86 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -74,6 +74,8 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define         BPF_LSH         0x60
 #define         BPF_RSH         0x70
 #define         BPF_NEG         0x80
+#define		BPF_MOD		0x90
+
 #define         BPF_JA          0x00
 #define         BPF_JEQ         0x10
 #define         BPF_JGT         0x20
@@ -196,6 +198,8 @@ enum {
 	BPF_S_ALU_MUL_K,
 	BPF_S_ALU_MUL_X,
 	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_MOD_K,
+	BPF_S_ALU_MOD_X,
 	BPF_S_ALU_AND_K,
 	BPF_S_ALU_AND_X,
 	BPF_S_ALU_OR_K,
diff --git a/net/core/filter.c b/net/core/filter.c
index 907efd27ec77..fbe3a8d12570 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -167,6 +167,14 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
 		case BPF_S_ALU_DIV_K:
 			A = reciprocal_divide(A, K);
 			continue;
+		case BPF_S_ALU_MOD_X:
+			if (X == 0)
+				return 0;
+			A %= X;
+			continue;
+		case BPF_S_ALU_MOD_K:
+			A %= K;
+			continue;
 		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
@@ -469,6 +477,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
 		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
 		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
+		[BPF_ALU|BPF_MOD|BPF_K]  = BPF_S_ALU_MOD_K,
+		[BPF_ALU|BPF_MOD|BPF_X]  = BPF_S_ALU_MOD_X,
 		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
 		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
 		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
@@ -531,6 +541,11 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 				return -EINVAL;
 			ftest->k = reciprocal_value(ftest->k);
 			break;
+		case BPF_S_ALU_MOD_K:
+			/* check for division by zero */
+			if (ftest->k == 0)
+				return -EINVAL;
+			break;
 		case BPF_S_LD_MEM:
 		case BPF_S_LDX_MEM:
 		case BPF_S_ST:
-- 
cgit v1.2.3


From 6d57e9078e880a3dd232d579f42ac437a8f1ef7b Mon Sep 17 00:00:00 2001
From: Duan Jiong <djduanjiong@gmail.com>
Date: Sat, 8 Sep 2012 16:32:28 +0000
Subject: etherdevice: introduce help function eth_zero_addr()

a lot of code has either the memset or an inefficient copy
from a static array that contains the all-zeros Ethernet address.
Introduce help function eth_zero_addr() to fill an address with
all zeros, making the code clearer and allowing us to get rid of
some constant arrays.

Signed-off-by: Duan Jiong <djduanjiong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index d426336d92d9..b006ba0a9f42 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -150,6 +150,17 @@ static inline void eth_broadcast_addr(u8 *addr)
 	memset(addr, 0xff, ETH_ALEN);
 }
 
+/**
+ * eth_zero_addr - Assign zero address
+ * @addr: Pointer to a six-byte array containing the Ethernet address
+ *
+ * Assign the zero address to the given address array.
+ */
+static inline void eth_zero_addr(u8 *addr)
+{
+	memset(addr, 0x00, ETH_ALEN);
+}
+
 /**
  * eth_hw_addr_random - Generate software assigned random Ethernet and
  * set device flag
-- 
cgit v1.2.3


From 8289bab1daf9768c20114051a99c1bd5f48d4420 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 7 Sep 2012 12:39:21 +0000
Subject: scsi_netlink: Remove dead and buggy code

The scsi netlink code confuses the netlink port id with a process id,
going so far as to read NETLINK_CREDS(skb)->pid instead of the correct
NETLINK_CB(skb).pid.  Fortunately it does not matter because nothing
registers to respond to scsi netlink requests.

The only interesting use of the scsi_netlink interface is
fc_host_post_vendor_event which sends a netlink multicast message.

Since nothing registers to handle scsi netlink messages kill all of the
registration logic, while retaining the same error handling behavior
preserving the userspace visible behavior and removing all of the
confused code that thought a netlink port id was a process id.

This was tested with a kernel allyesconfig build which had no problems.

Cc: James Bottomley <James.Bottomley@parallels.com>
Cc: James Smart <James.Smart@Emulex.Com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/scsi_netlink.c | 555 ++------------------------------------------
 include/scsi/scsi_netlink.h |  24 --
 2 files changed, 15 insertions(+), 564 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 3252bc9625ee..65123a21b97e 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -33,40 +33,6 @@
 struct sock *scsi_nl_sock = NULL;
 EXPORT_SYMBOL_GPL(scsi_nl_sock);
 
-static DEFINE_SPINLOCK(scsi_nl_lock);
-static struct list_head scsi_nl_drivers;
-
-static u32	scsi_nl_state;
-#define STATE_EHANDLER_BSY		0x00000001
-
-struct scsi_nl_transport {
-	int (*msg_handler)(struct sk_buff *);
-	void (*event_handler)(struct notifier_block *, unsigned long, void *);
-	unsigned int refcnt;
-	int flags;
-};
-
-/* flags values (bit flags) */
-#define HANDLER_DELETING		0x1
-
-static struct scsi_nl_transport transports[SCSI_NL_MAX_TRANSPORTS] =
-	{ {NULL, }, };
-
-
-struct scsi_nl_drvr {
-	struct list_head next;
-	int (*dmsg_handler)(struct Scsi_Host *shost, void *payload,
-				 u32 len, u32 pid);
-	void (*devt_handler)(struct notifier_block *nb,
-				 unsigned long event, void *notify_ptr);
-	struct scsi_host_template *hostt;
-	u64 vendor_id;
-	unsigned int refcnt;
-	int flags;
-};
-
-
-
 /**
  * scsi_nl_rcv_msg - Receive message handler.
  * @skb:		socket receive buffer
@@ -81,7 +47,6 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
 {
 	struct nlmsghdr *nlh;
 	struct scsi_nl_hdr *hdr;
-	unsigned long flags;
 	u32 rlen;
 	int err, tport;
 
@@ -126,22 +91,24 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
 		/*
 		 * Deliver message to the appropriate transport
 		 */
-		spin_lock_irqsave(&scsi_nl_lock, flags);
-
 		tport = hdr->transport;
-		if ((tport < SCSI_NL_MAX_TRANSPORTS) &&
-		    !(transports[tport].flags & HANDLER_DELETING) &&
-		    (transports[tport].msg_handler)) {
-			transports[tport].refcnt++;
-			spin_unlock_irqrestore(&scsi_nl_lock, flags);
-			err = transports[tport].msg_handler(skb);
-			spin_lock_irqsave(&scsi_nl_lock, flags);
-			transports[tport].refcnt--;
-		} else
+		if (tport == SCSI_NL_TRANSPORT) {
+			switch (hdr->msgtype) {
+			case SCSI_NL_SHOST_VENDOR:
+				/* Locate the driver that corresponds to the message */
+				err = -ESRCH;
+				break;
+			default:
+				err = -EBADR;
+				break;
+			}
+			if (err)
+				printk(KERN_WARNING "%s: Msgtype %d failed - err %d\n",
+				       __func__, hdr->msgtype, err);
+		}
+		else
 			err = -ENOENT;
 
-		spin_unlock_irqrestore(&scsi_nl_lock, flags);
-
 next_msg:
 		if ((err) || (nlh->nlmsg_flags & NLM_F_ACK))
 			netlink_ack(skb, nlh, err);
@@ -150,333 +117,6 @@ next_msg:
 	}
 }
 
-
-/**
- * scsi_nl_rcv_event - Event handler for a netlink socket.
- * @this:		event notifier block
- * @event:		event type
- * @ptr:		event payload
- *
- **/
-static int
-scsi_nl_rcv_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	struct netlink_notify *n = ptr;
-	struct scsi_nl_drvr *driver;
-	unsigned long flags;
-	int tport;
-
-	if (n->protocol != NETLINK_SCSITRANSPORT)
-		return NOTIFY_DONE;
-
-	spin_lock_irqsave(&scsi_nl_lock, flags);
-	scsi_nl_state |= STATE_EHANDLER_BSY;
-
-	/*
-	 * Pass event on to any transports that may be listening
-	 */
-	for (tport = 0; tport < SCSI_NL_MAX_TRANSPORTS; tport++) {
-		if (!(transports[tport].flags & HANDLER_DELETING) &&
-		    (transports[tport].event_handler)) {
-			spin_unlock_irqrestore(&scsi_nl_lock, flags);
-			transports[tport].event_handler(this, event, ptr);
-			spin_lock_irqsave(&scsi_nl_lock, flags);
-		}
-	}
-
-	/*
-	 * Pass event on to any drivers that may be listening
-	 */
-	list_for_each_entry(driver, &scsi_nl_drivers, next) {
-		if (!(driver->flags & HANDLER_DELETING) &&
-		    (driver->devt_handler)) {
-			spin_unlock_irqrestore(&scsi_nl_lock, flags);
-			driver->devt_handler(this, event, ptr);
-			spin_lock_irqsave(&scsi_nl_lock, flags);
-		}
-	}
-
-	scsi_nl_state &= ~STATE_EHANDLER_BSY;
-	spin_unlock_irqrestore(&scsi_nl_lock, flags);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block scsi_netlink_notifier = {
-	.notifier_call  = scsi_nl_rcv_event,
-};
-
-
-/*
- * GENERIC SCSI transport receive and event handlers
- */
-
-/**
- * scsi_generic_msg_handler - receive message handler for GENERIC transport messages
- * @skb:		socket receive buffer
- **/
-static int
-scsi_generic_msg_handler(struct sk_buff *skb)
-{
-	struct nlmsghdr *nlh = nlmsg_hdr(skb);
-	struct scsi_nl_hdr *snlh = NLMSG_DATA(nlh);
-	struct scsi_nl_drvr *driver;
-	struct Scsi_Host *shost;
-	unsigned long flags;
-	int err = 0, match, pid;
-
-	pid = NETLINK_CREDS(skb)->pid;
-
-	switch (snlh->msgtype) {
-	case SCSI_NL_SHOST_VENDOR:
-		{
-		struct scsi_nl_host_vendor_msg *msg = NLMSG_DATA(nlh);
-
-		/* Locate the driver that corresponds to the message */
-		spin_lock_irqsave(&scsi_nl_lock, flags);
-		match = 0;
-		list_for_each_entry(driver, &scsi_nl_drivers, next) {
-			if (driver->vendor_id == msg->vendor_id) {
-				match = 1;
-				break;
-			}
-		}
-
-		if ((!match) || (!driver->dmsg_handler)) {
-			spin_unlock_irqrestore(&scsi_nl_lock, flags);
-			err = -ESRCH;
-			goto rcv_exit;
-		}
-
-		if (driver->flags & HANDLER_DELETING) {
-			spin_unlock_irqrestore(&scsi_nl_lock, flags);
-			err = -ESHUTDOWN;
-			goto rcv_exit;
-		}
-
-		driver->refcnt++;
-		spin_unlock_irqrestore(&scsi_nl_lock, flags);
-
-
-		/* if successful, scsi_host_lookup takes a shost reference */
-		shost = scsi_host_lookup(msg->host_no);
-		if (!shost) {
-			err = -ENODEV;
-			goto driver_exit;
-		}
-
-		/* is this host owned by the vendor ? */
-		if (shost->hostt != driver->hostt) {
-			err = -EINVAL;
-			goto vendormsg_put;
-		}
-
-		/* pass message on to the driver */
-		err = driver->dmsg_handler(shost, (void *)&msg[1],
-					 msg->vmsg_datalen, pid);
-
-vendormsg_put:
-		/* release reference by scsi_host_lookup */
-		scsi_host_put(shost);
-
-driver_exit:
-		/* release our own reference on the registration object */
-		spin_lock_irqsave(&scsi_nl_lock, flags);
-		driver->refcnt--;
-		spin_unlock_irqrestore(&scsi_nl_lock, flags);
-		break;
-		}
-
-	default:
-		err = -EBADR;
-		break;
-	}
-
-rcv_exit:
-	if (err)
-		printk(KERN_WARNING "%s: Msgtype %d failed - err %d\n",
-			 __func__, snlh->msgtype, err);
-	return err;
-}
-
-
-/**
- * scsi_nl_add_transport -
- *    Registers message and event handlers for a transport. Enables
- *    receipt of netlink messages and events to a transport.
- *
- * @tport:		transport registering handlers
- * @msg_handler:	receive message handler callback
- * @event_handler:	receive event handler callback
- **/
-int
-scsi_nl_add_transport(u8 tport,
-	int (*msg_handler)(struct sk_buff *),
-	void (*event_handler)(struct notifier_block *, unsigned long, void *))
-{
-	unsigned long flags;
-	int err = 0;
-
-	if (tport >= SCSI_NL_MAX_TRANSPORTS)
-		return -EINVAL;
-
-	spin_lock_irqsave(&scsi_nl_lock, flags);
-
-	if (scsi_nl_state & STATE_EHANDLER_BSY) {
-		spin_unlock_irqrestore(&scsi_nl_lock, flags);
-		msleep(1);
-		spin_lock_irqsave(&scsi_nl_lock, flags);
-	}
-
-	if (transports[tport].msg_handler || transports[tport].event_handler) {
-		err = -EALREADY;
-		goto register_out;
-	}
-
-	transports[tport].msg_handler = msg_handler;
-	transports[tport].event_handler = event_handler;
-	transports[tport].flags = 0;
-	transports[tport].refcnt = 0;
-
-register_out:
-	spin_unlock_irqrestore(&scsi_nl_lock, flags);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(scsi_nl_add_transport);
-
-
-/**
- * scsi_nl_remove_transport -
- *    Disable transport receiption of messages and events
- *
- * @tport:		transport deregistering handlers
- *
- **/
-void
-scsi_nl_remove_transport(u8 tport)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&scsi_nl_lock, flags);
-	if (scsi_nl_state & STATE_EHANDLER_BSY) {
-		spin_unlock_irqrestore(&scsi_nl_lock, flags);
-		msleep(1);
-		spin_lock_irqsave(&scsi_nl_lock, flags);
-	}
-
-	if (tport < SCSI_NL_MAX_TRANSPORTS) {
-		transports[tport].flags |= HANDLER_DELETING;
-
-		while (transports[tport].refcnt != 0) {
-			spin_unlock_irqrestore(&scsi_nl_lock, flags);
-			schedule_timeout_uninterruptible(HZ/4);
-			spin_lock_irqsave(&scsi_nl_lock, flags);
-		}
-		transports[tport].msg_handler = NULL;
-		transports[tport].event_handler = NULL;
-		transports[tport].flags = 0;
-	}
-
-	spin_unlock_irqrestore(&scsi_nl_lock, flags);
-
-	return;
-}
-EXPORT_SYMBOL_GPL(scsi_nl_remove_transport);
-
-
-/**
- * scsi_nl_add_driver -
- *    A driver is registering its interfaces for SCSI netlink messages
- *
- * @vendor_id:          A unique identification value for the driver.
- * @hostt:		address of the driver's host template. Used
- *			to verify an shost is bound to the driver
- * @nlmsg_handler:	receive message handler callback
- * @nlevt_handler:	receive event handler callback
- *
- * Returns:
- *   0 on Success
- *   error result otherwise
- **/
-int
-scsi_nl_add_driver(u64 vendor_id, struct scsi_host_template *hostt,
-	int (*nlmsg_handler)(struct Scsi_Host *shost, void *payload,
-				 u32 len, u32 pid),
-	void (*nlevt_handler)(struct notifier_block *nb,
-				 unsigned long event, void *notify_ptr))
-{
-	struct scsi_nl_drvr *driver;
-	unsigned long flags;
-
-	driver = kzalloc(sizeof(*driver), GFP_KERNEL);
-	if (unlikely(!driver)) {
-		printk(KERN_ERR "%s: allocation failure\n", __func__);
-		return -ENOMEM;
-	}
-
-	driver->dmsg_handler = nlmsg_handler;
-	driver->devt_handler = nlevt_handler;
-	driver->hostt = hostt;
-	driver->vendor_id = vendor_id;
-
-	spin_lock_irqsave(&scsi_nl_lock, flags);
-	if (scsi_nl_state & STATE_EHANDLER_BSY) {
-		spin_unlock_irqrestore(&scsi_nl_lock, flags);
-		msleep(1);
-		spin_lock_irqsave(&scsi_nl_lock, flags);
-	}
-	list_add_tail(&driver->next, &scsi_nl_drivers);
-	spin_unlock_irqrestore(&scsi_nl_lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(scsi_nl_add_driver);
-
-
-/**
- * scsi_nl_remove_driver -
- *    An driver is unregistering with the SCSI netlink messages
- *
- * @vendor_id:          The unique identification value for the driver.
- **/
-void
-scsi_nl_remove_driver(u64 vendor_id)
-{
-	struct scsi_nl_drvr *driver;
-	unsigned long flags;
-
-	spin_lock_irqsave(&scsi_nl_lock, flags);
-	if (scsi_nl_state & STATE_EHANDLER_BSY) {
-		spin_unlock_irqrestore(&scsi_nl_lock, flags);
-		msleep(1);
-		spin_lock_irqsave(&scsi_nl_lock, flags);
-	}
-
-	list_for_each_entry(driver, &scsi_nl_drivers, next) {
-		if (driver->vendor_id == vendor_id) {
-			driver->flags |= HANDLER_DELETING;
-			while (driver->refcnt != 0) {
-				spin_unlock_irqrestore(&scsi_nl_lock, flags);
-				schedule_timeout_uninterruptible(HZ/4);
-				spin_lock_irqsave(&scsi_nl_lock, flags);
-			}
-			list_del(&driver->next);
-			kfree(driver);
-			spin_unlock_irqrestore(&scsi_nl_lock, flags);
-			return;
-		}
-	}
-
-	spin_unlock_irqrestore(&scsi_nl_lock, flags);
-
-	printk(KERN_ERR "%s: removal of driver failed - vendor_id 0x%llx\n",
-	       __func__, (unsigned long long)vendor_id);
-	return;
-}
-EXPORT_SYMBOL_GPL(scsi_nl_remove_driver);
-
-
 /**
  * scsi_netlink_init - Called by SCSI subsystem to initialize
  * 	the SCSI transport netlink interface
@@ -485,36 +125,19 @@ EXPORT_SYMBOL_GPL(scsi_nl_remove_driver);
 void
 scsi_netlink_init(void)
 {
-	int error;
 	struct netlink_kernel_cfg cfg = {
 		.input	= scsi_nl_rcv_msg,
 		.groups	= SCSI_NL_GRP_CNT,
 	};
 
-	INIT_LIST_HEAD(&scsi_nl_drivers);
-
-	error = netlink_register_notifier(&scsi_netlink_notifier);
-	if (error) {
-		printk(KERN_ERR "%s: register of event handler failed - %d\n",
-				__func__, error);
-		return;
-	}
-
 	scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT,
 					     &cfg);
 	if (!scsi_nl_sock) {
 		printk(KERN_ERR "%s: register of receive handler failed\n",
 				__func__);
-		netlink_unregister_notifier(&scsi_netlink_notifier);
 		return;
 	}
 
-	/* Register the entry points for the generic SCSI transport */
-	error = scsi_nl_add_transport(SCSI_NL_TRANSPORT,
-				scsi_generic_msg_handler, NULL);
-	if (error)
-		printk(KERN_ERR "%s: register of GENERIC transport handler"
-				"  failed - %d\n", __func__, error);
 	return;
 }
 
@@ -526,158 +149,10 @@ scsi_netlink_init(void)
 void
 scsi_netlink_exit(void)
 {
-	scsi_nl_remove_transport(SCSI_NL_TRANSPORT);
-
 	if (scsi_nl_sock) {
 		netlink_kernel_release(scsi_nl_sock);
-		netlink_unregister_notifier(&scsi_netlink_notifier);
 	}
 
 	return;
 }
 
-
-/*
- * Exported Interfaces
- */
-
-/**
- * scsi_nl_send_transport_msg -
- *    Generic function to send a single message from a SCSI transport to
- *    a single process
- *
- * @pid:		receiving pid
- * @hdr:		message payload
- *
- **/
-void
-scsi_nl_send_transport_msg(u32 pid, struct scsi_nl_hdr *hdr)
-{
-	struct sk_buff *skb;
-	struct nlmsghdr	*nlh;
-	const char *fn;
-	char *datab;
-	u32 len, skblen;
-	int err;
-
-	if (!scsi_nl_sock) {
-		err = -ENOENT;
-		fn = "netlink socket";
-		goto msg_fail;
-	}
-
-	len = NLMSG_SPACE(hdr->msglen);
-	skblen = NLMSG_SPACE(len);
-
-	skb = alloc_skb(skblen, GFP_KERNEL);
-	if (!skb) {
-		err = -ENOBUFS;
-		fn = "alloc_skb";
-		goto msg_fail;
-	}
-
-	nlh = nlmsg_put(skb, pid, 0, SCSI_TRANSPORT_MSG, len - sizeof(*nlh), 0);
-	if (!nlh) {
-		err = -ENOBUFS;
-		fn = "nlmsg_put";
-		goto msg_fail_skb;
-	}
-	datab = NLMSG_DATA(nlh);
-	memcpy(datab, hdr, hdr->msglen);
-
-	err = nlmsg_unicast(scsi_nl_sock, skb, pid);
-	if (err < 0) {
-		fn = "nlmsg_unicast";
-		/* nlmsg_unicast already kfree_skb'd */
-		goto msg_fail;
-	}
-
-	return;
-
-msg_fail_skb:
-	kfree_skb(skb);
-msg_fail:
-	printk(KERN_WARNING
-		"%s: Dropped Message : pid %d Transport %d, msgtype x%x, "
-		"msglen %d: %s : err %d\n",
-		__func__, pid, hdr->transport, hdr->msgtype, hdr->msglen,
-		fn, err);
-	return;
-}
-EXPORT_SYMBOL_GPL(scsi_nl_send_transport_msg);
-
-
-/**
- * scsi_nl_send_vendor_msg - called to send a shost vendor unique message
- *                      to a specific process id.
- *
- * @pid:		process id of the receiver
- * @host_no:		host # sending the message
- * @vendor_id:		unique identifier for the driver's vendor
- * @data_len:		amount, in bytes, of vendor unique payload data
- * @data_buf:		pointer to vendor unique data buffer
- *
- * Returns:
- *   0 on successful return
- *   otherwise, failing error code
- *
- * Notes:
- *	This routine assumes no locks are held on entry.
- */
-int
-scsi_nl_send_vendor_msg(u32 pid, unsigned short host_no, u64 vendor_id,
-			 char *data_buf, u32 data_len)
-{
-	struct sk_buff *skb;
-	struct nlmsghdr	*nlh;
-	struct scsi_nl_host_vendor_msg *msg;
-	u32 len, skblen;
-	int err;
-
-	if (!scsi_nl_sock) {
-		err = -ENOENT;
-		goto send_vendor_fail;
-	}
-
-	len = SCSI_NL_MSGALIGN(sizeof(*msg) + data_len);
-	skblen = NLMSG_SPACE(len);
-
-	skb = alloc_skb(skblen, GFP_KERNEL);
-	if (!skb) {
-		err = -ENOBUFS;
-		goto send_vendor_fail;
-	}
-
-	nlh = nlmsg_put(skb, 0, 0, SCSI_TRANSPORT_MSG,
-				skblen - sizeof(*nlh), 0);
-	if (!nlh) {
-		err = -ENOBUFS;
-		goto send_vendor_fail_skb;
-	}
-	msg = NLMSG_DATA(nlh);
-
-	INIT_SCSI_NL_HDR(&msg->snlh, SCSI_NL_TRANSPORT,
-				SCSI_NL_SHOST_VENDOR, len);
-	msg->vendor_id = vendor_id;
-	msg->host_no = host_no;
-	msg->vmsg_datalen = data_len;	/* bytes */
-	memcpy(&msg[1], data_buf, data_len);
-
-	err = nlmsg_unicast(scsi_nl_sock, skb, pid);
-	if (err)
-		/* nlmsg_multicast already kfree_skb'd */
-		goto send_vendor_fail;
-
-	return 0;
-
-send_vendor_fail_skb:
-	kfree_skb(skb);
-send_vendor_fail:
-	printk(KERN_WARNING
-		"%s: Dropped SCSI Msg : host %d vendor_unique - err %d\n",
-		__func__, host_no, err);
-	return err;
-}
-EXPORT_SYMBOL(scsi_nl_send_vendor_msg);
-
-
diff --git a/include/scsi/scsi_netlink.h b/include/scsi/scsi_netlink.h
index 5cb20ccb1956..62b4edab15d3 100644
--- a/include/scsi/scsi_netlink.h
+++ b/include/scsi/scsi_netlink.h
@@ -119,29 +119,5 @@ struct scsi_nl_host_vendor_msg {
 	(hdr)->msglen = mlen;					\
 	}
 
-
-#ifdef __KERNEL__
-
-#include <scsi/scsi_host.h>
-
-/* Exported Kernel Interfaces */
-int scsi_nl_add_transport(u8 tport,
-	 int (*msg_handler)(struct sk_buff *),
-	void (*event_handler)(struct notifier_block *, unsigned long, void *));
-void scsi_nl_remove_transport(u8 tport);
-
-int scsi_nl_add_driver(u64 vendor_id, struct scsi_host_template *hostt,
-	int (*nlmsg_handler)(struct Scsi_Host *shost, void *payload,
-				 u32 len, u32 pid),
-	void (*nlevt_handler)(struct notifier_block *nb,
-				 unsigned long event, void *notify_ptr));
-void scsi_nl_remove_driver(u64 vendor_id);
-
-void scsi_nl_send_transport_msg(u32 pid, struct scsi_nl_hdr *hdr);
-int scsi_nl_send_vendor_msg(u32 pid, unsigned short host_no, u64 vendor_id,
-			 char *data_buf, u32 data_len);
-
-#endif /* __KERNEL__ */
-
 #endif /* SCSI_NETLINK_H */
 
-- 
cgit v1.2.3


From b4516a288e71c64d7e214902250baf78b7b3cdcf Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 17 Sep 2012 13:13:24 -0400
Subject: llc: Remove stray reference to sysctl_llc_station_ack_timeout.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc.h        | 1 -
 net/llc/sysctl_net_llc.c | 7 -------
 2 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/net/llc.h b/include/net/llc.h
index f2d0fc570527..9e7d7f08ef77 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -151,7 +151,6 @@ extern int sysctl_llc2_ack_timeout;
 extern int sysctl_llc2_busy_timeout;
 extern int sysctl_llc2_p_timeout;
 extern int sysctl_llc2_rej_timeout;
-extern int sysctl_llc_station_ack_timeout;
 #else
 #define llc_sysctl_init() (0)
 #define llc_sysctl_exit() do { } while(0)
diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c
index d75306b9c2f3..612a5ddaf93b 100644
--- a/net/llc/sysctl_net_llc.c
+++ b/net/llc/sysctl_net_llc.c
@@ -47,13 +47,6 @@ static struct ctl_table llc2_timeout_table[] = {
 };
 
 static struct ctl_table llc_station_table[] = {
-	{
-		.procname	= "ack_timeout",
-		.data		= &sysctl_llc_station_ack_timeout,
-		.maxlen		= sizeof(long),
-		.mode		= 0644,
-		.proc_handler   = proc_dointvec_jiffies,
-	},
 	{ },
 };
 
-- 
cgit v1.2.3


From 30d08a46ea2a4e44bc1a1f15f243af29c9150282 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Tue, 18 Sep 2012 10:59:59 +0200
Subject: cfg80211: remove obsolete comment for .sched_scan_stop() callback

The kerneldoc comment for .sched_scan_stop() callback describes a
driver_initiated flag, but the interface does not hold such a flag.

Reviewed-by: Franky (Zhenhui) Lin <frankyl@broadcom.com>
Reviewed-by: Hante Meuleman <meuleman@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 903683b1e5b8..1a3fe9ae4c49 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1580,9 +1580,7 @@ struct cfg80211_gtk_rekey_data {
  * @set_cqm_txe_config: Configure connection quality monitor TX error
  *	thresholds.
  * @sched_scan_start: Tell the driver to start a scheduled scan.
- * @sched_scan_stop: Tell the driver to stop an ongoing scheduled
- *	scan.  The driver_initiated flag specifies whether the driver
- *	itself has informed that the scan has stopped.
+ * @sched_scan_stop: Tell the driver to stop an ongoing scheduled scan.
  *
  * @mgmt_frame_register: Notify driver that a management frame type was
  *	registered. Note that this callback may not sleep, and cannot run
-- 
cgit v1.2.3


From ed44a951c72ab409f932b1c15914488308e86da2 Mon Sep 17 00:00:00 2001
From: Pandiyarajan Pitchaimuthu <c_ppitch@qca.qualcomm.com>
Date: Tue, 18 Sep 2012 16:50:49 +0530
Subject: cfg80211/nl80211: Notify connection request failure in AP mode

In AP mode, when a station requests connection to an AP and if the
request is failed for particular reason, userspace is notified about the
failure through NL80211_CMD_CONN_FAILED command. Reason for the failure
is sent through the attribute NL80211_ATTR_CONN_FAILED_REASON.

Signed-off-by: Pandiyarajan Pitchaimuthu <c_ppitch@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/nl80211.h | 24 ++++++++++++++++++++++++
 include/net/cfg80211.h  | 19 +++++++++++++++++++
 net/wireless/mlme.c     | 11 +++++++++++
 net/wireless/nl80211.c  | 34 ++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  5 +++++
 5 files changed, 93 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 458416279347..7df9b500c804 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -573,6 +573,11 @@
  * @NL80211_CMD_STOP_P2P_DEVICE: Stop the given P2P Device, identified by
  *	its %NL80211_ATTR_WDEV identifier.
  *
+ * @NL80211_CMD_CONN_FAILED: connection request to an AP failed; used to
+ *	notify userspace that AP has rejected the connection request from a
+ *	station, due to particular reason. %NL80211_ATTR_CONN_FAILED_REASON
+ *	is used for this.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -719,6 +724,8 @@ enum nl80211_commands {
 	NL80211_CMD_START_P2P_DEVICE,
 	NL80211_CMD_STOP_P2P_DEVICE,
 
+	NL80211_CMD_CONN_FAILED,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1262,6 +1269,10 @@ enum nl80211_commands {
  *	was used to provide the hint. For the different types of
  *	allowed user regulatory hints see nl80211_user_reg_hint_type.
  *
+ * @NL80211_ATTR_CONN_FAILED_REASON: The reason for which AP has rejected
+ *	the connection request from a station. nl80211_connect_failed_reason
+ *	enum has different reasons of connection failure.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1517,6 +1528,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_USER_REG_HINT_TYPE,
 
+	NL80211_ATTR_CONN_FAILED_REASON,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3045,4 +3058,15 @@ enum nl80211_probe_resp_offload_support_attr {
 	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U =	1<<3,
 };
 
+/**
+ * enum nl80211_connect_failed_reason - connection request failed reasons
+ * @NL80211_CONN_FAIL_MAX_CLIENTS: Maximum number of clients that can be
+ *	handled by the AP is reached.
+ * @NL80211_CONN_FAIL_BLOCKED_CLIENT: Client's MAC is in the AP's blocklist.
+ */
+enum nl80211_connect_failed_reason {
+	NL80211_CONN_FAIL_MAX_CLIENTS,
+	NL80211_CONN_FAIL_BLOCKED_CLIENT,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1a3fe9ae4c49..80051ffc2755 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3360,6 +3360,25 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
  */
 void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp);
 
+/**
+ * cfg80211_conn_failed - connection request failed notification
+ *
+ * @dev: the netdev
+ * @mac_addr: the station's address
+ * @reason: the reason for connection failure
+ * @gfp: allocation flags
+ *
+ * Whenever a station tries to connect to an AP and if the station
+ * could not connect to the AP as the AP has rejected the connection
+ * for some reasons, this function is called.
+ *
+ * The reason for connection failure can be any of the value from
+ * nl80211_connect_failed_reason enum
+ */
+void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
+			  enum nl80211_connect_failed_reason reason,
+			  gfp_t gfp);
+
 /**
  * cfg80211_rx_mgmt - notification of received, unprocessed management frame
  * @wdev: wireless device receiving the frame
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 8fd0242ee169..3df195a3e336 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -612,6 +612,17 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
 }
 EXPORT_SYMBOL(cfg80211_del_sta);
 
+void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
+			  enum nl80211_connect_failed_reason reason,
+			  gfp_t gfp)
+{
+	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp);
+}
+EXPORT_SYMBOL(cfg80211_conn_failed);
+
 struct cfg80211_mgmt_registration {
 	struct list_head list;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 222189b6ed53..f1047aea868a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8364,6 +8364,40 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
+				    struct net_device *dev, const u8 *mac_addr,
+				    enum nl80211_connect_failed_reason reason,
+				    gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONN_FAILED);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
+	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) ||
+	    nla_put_u32(msg, NL80211_ATTR_CONN_FAILED_REASON, reason))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
 				       const u8 *addr, gfp_t gfp)
 {
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 9f2616fffb40..f6153516068c 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -91,6 +91,11 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
 				struct net_device *dev, const u8 *mac_addr,
 				gfp_t gfp);
 
+void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
+				    struct net_device *dev, const u8 *mac_addr,
+				    enum nl80211_connect_failed_reason reason,
+				    gfp_t gfp);
+
 int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev, u32 nlpid,
 		      int freq, int sig_dbm,
-- 
cgit v1.2.3


From 92a25256f142d55e25f9959441cea6ddeabae57e Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Thu, 6 Sep 2012 18:39:26 +0300
Subject: Bluetooth: mgmt: Implement support for passkey notification

This patch adds support for Secure Simple Pairing with devices that have
KeyboardOnly as their IO capability. Such devices will cause a passkey
notification on our side and optionally also keypress notifications.
Without this patch some keyboards cannot be paired using the mgmt
interface.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Cc: stable@vger.kernel.org
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
---
 include/net/bluetooth/hci.h      | 18 +++++++++++
 include/net/bluetooth/hci_core.h |  5 +++
 include/net/bluetooth/mgmt.h     |  7 +++++
 net/bluetooth/hci_event.c        | 67 ++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/mgmt.c             | 17 ++++++++++
 5 files changed, 114 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 0f28f7052f82..76b2b6bdcf36 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1249,6 +1249,24 @@ struct hci_ev_simple_pair_complete {
 	bdaddr_t bdaddr;
 } __packed;
 
+#define HCI_EV_USER_PASSKEY_NOTIFY	0x3b
+struct hci_ev_user_passkey_notify {
+	bdaddr_t	bdaddr;
+	__le32		passkey;
+} __packed;
+
+#define HCI_KEYPRESS_STARTED		0
+#define HCI_KEYPRESS_ENTERED		1
+#define HCI_KEYPRESS_ERASED		2
+#define HCI_KEYPRESS_CLEARED		3
+#define HCI_KEYPRESS_COMPLETED		4
+
+#define HCI_EV_KEYPRESS_NOTIFY		0x3c
+struct hci_ev_keypress_notify {
+	bdaddr_t	bdaddr;
+	__u8		type;
+} __packed;
+
 #define HCI_EV_REMOTE_HOST_FEATURES	0x3d
 struct hci_ev_remote_host_features {
 	bdaddr_t bdaddr;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 6a3337e9c42c..e7d454609881 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -303,6 +303,8 @@ struct hci_conn {
 	__u8		pin_length;
 	__u8		enc_key_size;
 	__u8		io_capability;
+	__u32		passkey_notify;
+	__u8		passkey_entered;
 	__u16		disc_timeout;
 	unsigned long	flags;
 
@@ -1022,6 +1024,9 @@ int mgmt_user_passkey_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
 				     u8 link_type, u8 addr_type, u8 status);
 int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
 					 u8 link_type, u8 addr_type, u8 status);
+int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
+			     u8 link_type, u8 addr_type, u32 passkey,
+			     u8 entered);
 int mgmt_auth_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		     u8 addr_type, u8 status);
 int mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 1b48effcd973..22980a7c3873 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -478,3 +478,10 @@ struct mgmt_ev_device_unblocked {
 struct mgmt_ev_device_unpaired {
 	struct mgmt_addr_info addr;
 } __packed;
+
+#define MGMT_EV_PASSKEY_NOTIFY		0x0017
+struct mgmt_ev_passkey_notify {
+	struct mgmt_addr_info addr;
+	__le32	passkey;
+	__u8	entered;
+} __packed;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 48d730228c2f..ccca88fc6195 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3263,6 +3263,65 @@ static void hci_user_passkey_request_evt(struct hci_dev *hdev,
 		mgmt_user_passkey_request(hdev, &ev->bdaddr, ACL_LINK, 0);
 }
 
+static void hci_user_passkey_notify_evt(struct hci_dev *hdev,
+					struct sk_buff *skb)
+{
+	struct hci_ev_user_passkey_notify *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s", hdev->name);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (!conn)
+		return;
+
+	conn->passkey_notify = __le32_to_cpu(ev->passkey);
+	conn->passkey_entered = 0;
+
+	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+		mgmt_user_passkey_notify(hdev, &conn->dst, conn->type,
+					 conn->dst_type, conn->passkey_notify,
+					 conn->passkey_entered);
+}
+
+static void hci_keypress_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_ev_keypress_notify *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
+	BT_DBG("%s", hdev->name);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (!conn)
+		return;
+
+	switch (ev->type) {
+	case HCI_KEYPRESS_STARTED:
+		conn->passkey_entered = 0;
+		return;
+
+	case HCI_KEYPRESS_ENTERED:
+		conn->passkey_entered++;
+		break;
+
+	case HCI_KEYPRESS_ERASED:
+		conn->passkey_entered--;
+		break;
+
+	case HCI_KEYPRESS_CLEARED:
+		conn->passkey_entered = 0;
+		break;
+
+	case HCI_KEYPRESS_COMPLETED:
+		return;
+	}
+
+	if (test_bit(HCI_MGMT, &hdev->dev_flags))
+		mgmt_user_passkey_notify(hdev, &conn->dst, conn->type,
+					 conn->dst_type, conn->passkey_notify,
+					 conn->passkey_entered);
+}
+
 static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
 					 struct sk_buff *skb)
 {
@@ -3627,6 +3686,14 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_user_passkey_request_evt(hdev, skb);
 		break;
 
+	case HCI_EV_USER_PASSKEY_NOTIFY:
+		hci_user_passkey_notify_evt(hdev, skb);
+		break;
+
+	case HCI_EV_KEYPRESS_NOTIFY:
+		hci_keypress_notify_evt(hdev, skb);
+		break;
+
 	case HCI_EV_SIMPLE_PAIR_COMPLETE:
 		hci_simple_pair_complete_evt(hdev, skb);
 		break;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 05d4b83a0189..8e1ab59a9cef 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -99,6 +99,7 @@ static const u16 mgmt_events[] = {
 	MGMT_EV_DEVICE_BLOCKED,
 	MGMT_EV_DEVICE_UNBLOCKED,
 	MGMT_EV_DEVICE_UNPAIRED,
+	MGMT_EV_PASSKEY_NOTIFY,
 };
 
 /*
@@ -3276,6 +3277,22 @@ int mgmt_user_passkey_neg_reply_complete(struct hci_dev *hdev, bdaddr_t *bdaddr,
 					  MGMT_OP_USER_PASSKEY_NEG_REPLY);
 }
 
+int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
+			     u8 link_type, u8 addr_type, u32 passkey,
+			     u8 entered)
+{
+	struct mgmt_ev_passkey_notify ev;
+
+	BT_DBG("%s", hdev->name);
+
+	bacpy(&ev.addr.bdaddr, bdaddr);
+	ev.addr.type = link_to_bdaddr(link_type, addr_type);
+	ev.passkey = __cpu_to_le32(passkey);
+	ev.entered = entered;
+
+	return mgmt_event(MGMT_EV_PASSKEY_NOTIFY, hdev, &ev, sizeof(ev), NULL);
+}
+
 int mgmt_auth_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		     u8 addr_type, u8 status)
 {
-- 
cgit v1.2.3


From 552bff0c2fec8953ba3793d75ee335032cc0b47c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 19 Sep 2012 09:26:06 +0200
Subject: cfg80211: constify name parameter to add_virtual_intf

The name can't be modified by the driver,
make it const.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c | 4 ++--
 drivers/net/wireless/ath/ath6kl/cfg80211.h | 2 +-
 drivers/net/wireless/mwifiex/cfg80211.c    | 2 +-
 drivers/net/wireless/mwifiex/main.h        | 2 +-
 include/net/cfg80211.h                     | 2 +-
 net/mac80211/cfg.c                         | 3 ++-
 6 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 86aeef4b9d7e..7089f8160ad5 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1488,7 +1488,7 @@ static int ath6kl_cfg80211_set_power_mgmt(struct wiphy *wiphy,
 }
 
 static struct wireless_dev *ath6kl_cfg80211_add_iface(struct wiphy *wiphy,
-						      char *name,
+						      const char *name,
 						      enum nl80211_iftype type,
 						      u32 *flags,
 						      struct vif_params *params)
@@ -3477,7 +3477,7 @@ void ath6kl_cfg80211_vif_cleanup(struct ath6kl_vif *vif)
 	ar->num_vif--;
 }
 
-struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, char *name,
+struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, const char *name,
 					  enum nl80211_iftype type,
 					  u8 fw_vif_idx, u8 nw_type)
 {
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.h b/drivers/net/wireless/ath/ath6kl/cfg80211.h
index 56b1ebe79812..780f77775a91 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.h
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.h
@@ -25,7 +25,7 @@ enum ath6kl_cfg_suspend_mode {
 	ATH6KL_CFG_SUSPEND_SCHED_SCAN,
 };
 
-struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, char *name,
+struct wireless_dev *ath6kl_interface_add(struct ath6kl *ar, const char *name,
 					  enum nl80211_iftype type,
 					  u8 fw_vif_idx, u8 nw_type);
 void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq,
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index e57f543413de..49e20661b60b 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -1597,7 +1597,7 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info,
  *  create a new virtual interface with the given name
  */
 struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
-					      char *name,
+					      const char *name,
 					      enum nl80211_iftype type,
 					      u32 *flags,
 					      struct vif_params *params)
diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h
index 994bc4fc263e..d2fb3c3e1db1 100644
--- a/drivers/net/wireless/mwifiex/main.h
+++ b/drivers/net/wireless/mwifiex/main.h
@@ -1030,7 +1030,7 @@ int mwifiex_check_network_compatibility(struct mwifiex_private *priv,
 					struct mwifiex_bssdescriptor *bss_desc);
 
 struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
-					      char *name,
+					      const char *name,
 					      enum nl80211_iftype type,
 					      u32 *flags,
 					      struct vif_params *params);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 80051ffc2755..ab78b53bd6f5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1628,7 +1628,7 @@ struct cfg80211_ops {
 	void	(*set_wakeup)(struct wiphy *wiphy, bool enabled);
 
 	struct wireless_dev * (*add_virtual_intf)(struct wiphy *wiphy,
-						  char *name,
+						  const char *name,
 						  enum nl80211_iftype type,
 						  u32 *flags,
 						  struct vif_params *params);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9bd56a744982..05f3a313db88 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -20,7 +20,8 @@
 #include "rate.h"
 #include "mesh.h"
 
-static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, char *name,
+static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy,
+						const char *name,
 						enum nl80211_iftype type,
 						u32 *flags,
 						struct vif_params *params)
-- 
cgit v1.2.3


From 6b6e27255f29a6191ef8ad96bfcc392ab2ef6c71 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 17 Sep 2012 10:03:26 +0000
Subject: netdev: make address const in device address management

The internal functions for add/deleting addresses don't change
their argument.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  4 +--
 drivers/net/macvlan.c                         |  4 +--
 include/linux/netdevice.h                     | 28 +++++++++----------
 net/bridge/br_fdb.c                           |  6 ++--
 net/bridge/br_private.h                       |  4 +--
 net/core/dev_addr_lists.c                     | 40 ++++++++++++++-------------
 6 files changed, 44 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2dc9d91e2b67..70d27a361857 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6890,7 +6890,7 @@ static int ixgbe_set_features(struct net_device *netdev,
 
 static int ixgbe_ndo_fdb_add(struct ndmsg *ndm,
 			     struct net_device *dev,
-			     unsigned char *addr,
+			     const unsigned char *addr,
 			     u16 flags)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
@@ -6927,7 +6927,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm,
 
 static int ixgbe_ndo_fdb_del(struct ndmsg *ndm,
 			     struct net_device *dev,
-			     unsigned char *addr)
+			     const unsigned char *addr)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	int err = -EOPNOTSUPP;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 66a9bfe7b1c8..815dfcfbc7b9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -548,7 +548,7 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
 
 static int macvlan_fdb_add(struct ndmsg *ndm,
 			   struct net_device *dev,
-			   unsigned char *addr,
+			   const unsigned char *addr,
 			   u16 flags)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -567,7 +567,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm,
 
 static int macvlan_fdb_del(struct ndmsg *ndm,
 			   struct net_device *dev,
-			   unsigned char *addr)
+			   const unsigned char *addr)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	int err = -EINVAL;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ae3153c0db0a..82264e717e53 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -907,10 +907,10 @@ struct netdev_fcoe_hbainfo {
  *	Must return >0 or -errno if it changed dev->features itself.
  *
  * int (*ndo_fdb_add)(struct ndmsg *ndm, struct net_device *dev,
- *		      unsigned char *addr, u16 flags)
+ *		      const unsigned char *addr, u16 flags)
  *	Adds an FDB entry to dev for addr.
  * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev,
- *		      unsigned char *addr)
+ *		      const unsigned char *addr)
  *	Deletes the FDB entry from dev coresponding to addr.
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
  *		       struct net_device *dev, int idx)
@@ -1017,11 +1017,11 @@ struct net_device_ops {
 
 	int			(*ndo_fdb_add)(struct ndmsg *ndm,
 					       struct net_device *dev,
-					       unsigned char *addr,
+					       const unsigned char *addr,
 					       u16 flags);
 	int			(*ndo_fdb_del)(struct ndmsg *ndm,
 					       struct net_device *dev,
-					       unsigned char *addr);
+					       const unsigned char *addr);
 	int			(*ndo_fdb_dump)(struct sk_buff *skb,
 						struct netlink_callback *cb,
 						struct net_device *dev,
@@ -2561,9 +2561,9 @@ extern void __hw_addr_flush(struct netdev_hw_addr_list *list);
 extern void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
-extern int dev_addr_add(struct net_device *dev, unsigned char *addr,
+extern int dev_addr_add(struct net_device *dev, const unsigned char *addr,
 			unsigned char addr_type);
-extern int dev_addr_del(struct net_device *dev, unsigned char *addr,
+extern int dev_addr_del(struct net_device *dev, const unsigned char *addr,
 			unsigned char addr_type);
 extern int dev_addr_add_multiple(struct net_device *to_dev,
 				 struct net_device *from_dev,
@@ -2575,20 +2575,20 @@ extern void dev_addr_flush(struct net_device *dev);
 extern int dev_addr_init(struct net_device *dev);
 
 /* Functions used for unicast addresses handling */
-extern int dev_uc_add(struct net_device *dev, unsigned char *addr);
-extern int dev_uc_add_excl(struct net_device *dev, unsigned char *addr);
-extern int dev_uc_del(struct net_device *dev, unsigned char *addr);
+extern int dev_uc_add(struct net_device *dev, const unsigned char *addr);
+extern int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr);
+extern int dev_uc_del(struct net_device *dev, const unsigned char *addr);
 extern int dev_uc_sync(struct net_device *to, struct net_device *from);
 extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
 extern void dev_uc_flush(struct net_device *dev);
 extern void dev_uc_init(struct net_device *dev);
 
 /* Functions used for multicast addresses handling */
-extern int dev_mc_add(struct net_device *dev, unsigned char *addr);
-extern int dev_mc_add_global(struct net_device *dev, unsigned char *addr);
-extern int dev_mc_add_excl(struct net_device *dev, unsigned char *addr);
-extern int dev_mc_del(struct net_device *dev, unsigned char *addr);
-extern int dev_mc_del_global(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_add(struct net_device *dev, const unsigned char *addr);
+extern int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
+extern int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr);
+extern int dev_mc_del(struct net_device *dev, const unsigned char *addr);
+extern int dev_mc_del_global(struct net_device *dev, const unsigned char *addr);
 extern int dev_mc_sync(struct net_device *to, struct net_device *from);
 extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
 extern void dev_mc_flush(struct net_device *dev);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ddf93efc133c..02861190a3d4 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -609,7 +609,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
-	       unsigned char *addr, u16 nlh_flags)
+	       const unsigned char *addr, u16 nlh_flags)
 {
 	struct net_bridge_port *p;
 	int err = 0;
@@ -639,7 +639,7 @@ int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
 	return err;
 }
 
-static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
+static int fdb_delete_by_addr(struct net_bridge_port *p, const u8 *addr)
 {
 	struct net_bridge *br = p->br;
 	struct hlist_head *head = &br->hash[br_mac_hash(addr)];
@@ -655,7 +655,7 @@ static int fdb_delete_by_addr(struct net_bridge_port *p, u8 *addr)
 
 /* Remove neighbor entry with RTM_DELNEIGH */
 int br_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
-		  unsigned char *addr)
+		  const unsigned char *addr)
 {
 	struct net_bridge_port *p;
 	int err;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f507d2af9646..11a984b87e62 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -363,10 +363,10 @@ extern void br_fdb_update(struct net_bridge *br,
 
 extern int br_fdb_delete(struct ndmsg *ndm,
 			 struct net_device *dev,
-			 unsigned char *addr);
+			 const unsigned char *addr);
 extern int br_fdb_add(struct ndmsg *nlh,
 		      struct net_device *dev,
-		      unsigned char *addr,
+		      const unsigned char *addr,
 		      u16 nlh_flags);
 extern int br_fdb_dump(struct sk_buff *skb,
 		       struct netlink_callback *cb,
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index c4cc2bc49f06..87cc17db2d56 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -22,7 +22,7 @@
  */
 
 static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
-			       unsigned char *addr, int addr_len,
+			       const unsigned char *addr, int addr_len,
 			       unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
@@ -46,7 +46,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
 }
 
 static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
-			    unsigned char *addr, int addr_len,
+			    const unsigned char *addr, int addr_len,
 			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
@@ -72,14 +72,15 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
 	return __hw_addr_create_ex(list, addr, addr_len, addr_type, global);
 }
 
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_add(struct netdev_hw_addr_list *list,
+			 const unsigned char *addr, int addr_len,
+			 unsigned char addr_type)
 {
 	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
 }
 
 static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
-			    unsigned char *addr, int addr_len,
+			    const unsigned char *addr, int addr_len,
 			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
@@ -104,8 +105,9 @@ static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
 	return -ENOENT;
 }
 
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_del(struct netdev_hw_addr_list *list,
+			 const unsigned char *addr, int addr_len,
+			 unsigned char addr_type)
 {
 	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
 }
@@ -278,7 +280,7 @@ EXPORT_SYMBOL(dev_addr_init);
  *
  *	The caller must hold the rtnl_mutex.
  */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
+int dev_addr_add(struct net_device *dev, const unsigned char *addr,
 		 unsigned char addr_type)
 {
 	int err;
@@ -303,7 +305,7 @@ EXPORT_SYMBOL(dev_addr_add);
  *
  *	The caller must hold the rtnl_mutex.
  */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
+int dev_addr_del(struct net_device *dev, const unsigned char *addr,
 		 unsigned char addr_type)
 {
 	int err;
@@ -390,7 +392,7 @@ EXPORT_SYMBOL(dev_addr_del_multiple);
  *	@dev: device
  *	@addr: address to add
  */
-int dev_uc_add_excl(struct net_device *dev, unsigned char *addr)
+int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
 {
 	struct netdev_hw_addr *ha;
 	int err;
@@ -421,7 +423,7 @@ EXPORT_SYMBOL(dev_uc_add_excl);
  *	Add a secondary unicast address to the device or increase
  *	the reference count if it already exists.
  */
-int dev_uc_add(struct net_device *dev, unsigned char *addr)
+int dev_uc_add(struct net_device *dev, const unsigned char *addr)
 {
 	int err;
 
@@ -443,7 +445,7 @@ EXPORT_SYMBOL(dev_uc_add);
  *	Release reference to a secondary unicast address and remove it
  *	from the device if the reference count drops to zero.
  */
-int dev_uc_del(struct net_device *dev, unsigned char *addr)
+int dev_uc_del(struct net_device *dev, const unsigned char *addr)
 {
 	int err;
 
@@ -543,7 +545,7 @@ EXPORT_SYMBOL(dev_uc_init);
  *	@dev: device
  *	@addr: address to add
  */
-int dev_mc_add_excl(struct net_device *dev, unsigned char *addr)
+int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
 {
 	struct netdev_hw_addr *ha;
 	int err;
@@ -566,7 +568,7 @@ out:
 }
 EXPORT_SYMBOL(dev_mc_add_excl);
 
-static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
 			bool global)
 {
 	int err;
@@ -587,7 +589,7 @@ static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
  *	Add a multicast address to the device or increase
  *	the reference count if it already exists.
  */
-int dev_mc_add(struct net_device *dev, unsigned char *addr)
+int dev_mc_add(struct net_device *dev, const unsigned char *addr)
 {
 	return __dev_mc_add(dev, addr, false);
 }
@@ -600,13 +602,13 @@ EXPORT_SYMBOL(dev_mc_add);
  *
  *	Add a global multicast address to the device.
  */
-int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+int dev_mc_add_global(struct net_device *dev, const unsigned char *addr)
 {
 	return __dev_mc_add(dev, addr, true);
 }
 EXPORT_SYMBOL(dev_mc_add_global);
 
-static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
 			bool global)
 {
 	int err;
@@ -628,7 +630,7 @@ static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
  *	Release reference to a multicast address and remove it
  *	from the device if the reference count drops to zero.
  */
-int dev_mc_del(struct net_device *dev, unsigned char *addr)
+int dev_mc_del(struct net_device *dev, const unsigned char *addr)
 {
 	return __dev_mc_del(dev, addr, false);
 }
@@ -642,7 +644,7 @@ EXPORT_SYMBOL(dev_mc_del);
  *	Release reference to a multicast address and remove it
  *	from the device if the reference count drops to zero.
  */
-int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+int dev_mc_del_global(struct net_device *dev, const unsigned char *addr)
 {
 	return __dev_mc_del(dev, addr, true);
 }
-- 
cgit v1.2.3


From 8c4c49df5cfeb8d56e5b85a430c8cbcb86c2ac37 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Mon, 17 Sep 2012 20:16:31 +0000
Subject: netpoll: call ->ndo_select_queue() in tx path

In netpoll tx path, we miss the chance of calling ->ndo_select_queue(),
thus could cause problems when bonding is involved.

This patch makes dev_pick_tx() extern (and rename it to netdev_pick_tx())
to let netpoll call it in netpoll_send_skb_on_dev().

Reported-by: Sylvain Munaut <s.munaut@whatever-company.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <amwang@redhat.com>
Tested-by: Sylvain Munaut <s.munaut@whatever-company.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/dev.c            | 6 +++---
 net/core/netpoll.c        | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 82264e717e53..6c131f055ab0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1403,6 +1403,9 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 		f(dev, &dev->_tx[i], arg);
 }
 
+extern struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+					   struct sk_buff *skb);
+
 /*
  * Net namespace inlines
  */
diff --git a/net/core/dev.c b/net/core/dev.c
index bbda81997f4f..707b12425a79 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2396,8 +2396,8 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 #endif
 }
 
-static struct netdev_queue *dev_pick_tx(struct net_device *dev,
-					struct sk_buff *skb)
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+				    struct sk_buff *skb)
 {
 	int queue_index;
 	const struct net_device_ops *ops = dev->netdev_ops;
@@ -2571,7 +2571,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 	skb_update_prio(skb);
 
-	txq = dev_pick_tx(dev, skb);
+	txq = netdev_pick_tx(dev, skb);
 	q = rcu_dereference_bh(txq->qdisc);
 
 #ifdef CONFIG_NET_CLS_ACT
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index dd67818025d1..77a0388fc3be 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -328,7 +328,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 		struct netdev_queue *txq;
 
-		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+		txq = netdev_pick_tx(dev, skb);
 
 		/* try until next clock tick */
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
-- 
cgit v1.2.3


From c038a767cd697238b09f7a4ea5a504b4891774e9 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 18 Sep 2012 16:50:08 +0000
Subject: ipv6: add a new namespace for nf_conntrack_reasm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As pointed by Michal, it is necessary to add a new
namespace for nf_conntrack_reasm code, this prepares
for the second patch.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Michal Kubeček <mkubecek@suse.cz>
Cc: David Miller <davem@davemloft.net>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: netfilter-devel@vger.kernel.org
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h             |   3 +
 include/net/netns/ipv6.h                |   8 ++
 net/ipv6/netfilter/nf_conntrack_reasm.c | 137 ++++++++++++++++++++++----------
 3 files changed, 106 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 5ae57f1ab755..d61e2b36d9e3 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -92,6 +92,9 @@ struct net {
 	struct netns_xt		xt;
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct netns_ct		ct;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+	struct netns_nf_frag	nf_frag;
 #endif
 	struct sock		*nfnl;
 	struct sock		*nfnl_stash;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 0318104a9458..214cb0a53359 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -71,4 +71,12 @@ struct netns_ipv6 {
 #endif
 #endif
 };
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+struct netns_nf_frag {
+	struct netns_sysctl_ipv6 sysctl;
+	struct netns_frags	frags;
+};
+#endif
+
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index f94fb3ac2a79..f40f327ccc0c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -71,27 +71,26 @@ struct nf_ct_frag6_queue
 };
 
 static struct inet_frags nf_frags;
-static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{
 		.procname	= "nf_conntrack_frag6_timeout",
-		.data		= &nf_init_frags.timeout,
+		.data		= &init_net.nf_frag.frags.timeout,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 	{
 		.procname	= "nf_conntrack_frag6_low_thresh",
-		.data		= &nf_init_frags.low_thresh,
+		.data		= &init_net.nf_frag.frags.low_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
 	{
 		.procname	= "nf_conntrack_frag6_high_thresh",
-		.data		= &nf_init_frags.high_thresh,
+		.data		= &init_net.nf_frag.frags.high_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
@@ -99,7 +98,55 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
 	{ }
 };
 
-static struct ctl_table_header *nf_ct_frag6_sysctl_header;
+static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
+{
+	struct ctl_table *table;
+	struct ctl_table_header *hdr;
+
+	table = nf_ct_frag6_sysctl_table;
+	if (!net_eq(net, &init_net)) {
+		table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table),
+				GFP_KERNEL);
+		if (table == NULL)
+			goto err_alloc;
+
+		table[0].data = &net->ipv6.frags.high_thresh;
+		table[1].data = &net->ipv6.frags.low_thresh;
+		table[2].data = &net->ipv6.frags.timeout;
+	}
+
+	hdr = register_net_sysctl(net, "net/netfilter", table);
+	if (hdr == NULL)
+		goto err_reg;
+
+	net->ipv6.sysctl.frags_hdr = hdr;
+	return 0;
+
+err_reg:
+	if (!net_eq(net, &init_net))
+		kfree(table);
+err_alloc:
+	return -ENOMEM;
+}
+
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
+{
+	struct ctl_table *table;
+
+	table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+	if (!net_eq(net, &init_net))
+		kfree(table);
+}
+
+#else
+static int __net_init nf_ct_frag6_sysctl_register(struct net *net)
+{
+	return 0;
+}
+static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
+{
+}
 #endif
 
 static unsigned int nf_hashfn(struct inet_frag_queue *q)
@@ -131,13 +178,6 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
 	inet_frag_kill(&fq->q, &nf_frags);
 }
 
-static void nf_ct_frag6_evictor(void)
-{
-	local_bh_disable();
-	inet_frag_evictor(&nf_init_frags, &nf_frags);
-	local_bh_enable();
-}
-
 static void nf_ct_frag6_expire(unsigned long data)
 {
 	struct nf_ct_frag6_queue *fq;
@@ -158,9 +198,9 @@ out:
 }
 
 /* Creation primitives. */
-
-static __inline__ struct nf_ct_frag6_queue *
-fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
+static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id,
+						u32 user, struct in6_addr *src,
+						struct in6_addr *dst)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -174,7 +214,7 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst)
 	read_lock_bh(&nf_frags.lock);
 	hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
 
-	q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash);
+	q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
 	local_bh_enable();
 	if (q == NULL)
 		goto oom;
@@ -312,7 +352,7 @@ found:
 	fq->q.meat += skb->len;
 	if (payload_len > fq->q.max_size)
 		fq->q.max_size = payload_len;
-	atomic_add(skb->truesize, &nf_init_frags.mem);
+	atomic_add(skb->truesize, &fq->q.net->mem);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
@@ -322,7 +362,7 @@ found:
 		fq->q.last_in |= INET_FRAG_FIRST_IN;
 	}
 	write_lock(&nf_frags.lock);
-	list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list);
+	list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
 	write_unlock(&nf_frags.lock);
 	return 0;
 
@@ -391,7 +431,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		clone->ip_summed = head->ip_summed;
 
 		NFCT_FRAG6_CB(clone)->orig = NULL;
-		atomic_add(clone->truesize, &nf_init_frags.mem);
+		atomic_add(clone->truesize, &fq->q.net->mem);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -415,7 +455,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
 	}
-	atomic_sub(head->truesize, &nf_init_frags.mem);
+	atomic_sub(head->truesize, &fq->q.net->mem);
 
 	head->local_df = 1;
 	head->next = NULL;
@@ -527,6 +567,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 {
 	struct sk_buff *clone;
 	struct net_device *dev = skb->dev;
+	struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
+				       : dev_net(skb->dev);
 	struct frag_hdr *fhdr;
 	struct nf_ct_frag6_queue *fq;
 	struct ipv6hdr *hdr;
@@ -560,10 +602,13 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 	hdr = ipv6_hdr(clone);
 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
-	if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh)
-		nf_ct_frag6_evictor();
+	if (atomic_read(&net->nf_frag.frags.mem) > net->nf_frag.frags.high_thresh) {
+		local_bh_disable();
+		inet_frag_evictor(&net->nf_frag.frags, &nf_frags);
+		local_bh_enable();
+	}
 
-	fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr);
+	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
 	if (fq == NULL) {
 		pr_debug("Can't find and can't create new queue\n");
 		goto ret_orig;
@@ -621,8 +666,31 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
 	nf_conntrack_put_reasm(skb);
 }
 
+static int nf_ct_net_init(struct net *net)
+{
+	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
+	inet_frags_init_net(&net->nf_frag.frags);
+
+	return nf_ct_frag6_sysctl_register(net);
+}
+
+static void nf_ct_net_exit(struct net *net)
+{
+	nf_ct_frags6_sysctl_unregister(net);
+	inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+}
+
+static struct pernet_operations nf_ct_net_ops = {
+	.init = nf_ct_net_init,
+	.exit = nf_ct_net_exit,
+};
+
 int nf_ct_frag6_init(void)
 {
+	int ret = 0;
+
 	nf_frags.hashfn = nf_hashfn;
 	nf_frags.constructor = ip6_frag_init;
 	nf_frags.destructor = NULL;
@@ -631,32 +699,17 @@ int nf_ct_frag6_init(void)
 	nf_frags.match = ip6_frag_match;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
 	nf_frags.secret_interval = 10 * 60 * HZ;
-	nf_init_frags.timeout = IPV6_FRAG_TIMEOUT;
-	nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
-	nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
-	inet_frags_init_net(&nf_init_frags);
 	inet_frags_init(&nf_frags);
 
-#ifdef CONFIG_SYSCTL
-	nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter",
-							nf_ct_frag6_sysctl_table);
-	if (!nf_ct_frag6_sysctl_header) {
+	ret = register_pernet_subsys(&nf_ct_net_ops);
+	if (ret)
 		inet_frags_fini(&nf_frags);
-		return -ENOMEM;
-	}
-#endif
 
-	return 0;
+	return ret;
 }
 
 void nf_ct_frag6_cleanup(void)
 {
-#ifdef CONFIG_SYSCTL
-	unregister_net_sysctl_table(nf_ct_frag6_sysctl_header);
-	nf_ct_frag6_sysctl_header = NULL;
-#endif
+	unregister_pernet_subsys(&nf_ct_net_ops);
 	inet_frags_fini(&nf_frags);
-
-	nf_init_frags.low_thresh = 0;
-	nf_ct_frag6_evictor();
 }
-- 
cgit v1.2.3


From b836c99fd6c9dfe52a69fa0ba36ec918f80ce02a Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 18 Sep 2012 16:50:09 +0000
Subject: ipv6: unify conntrack reassembly expire code with standard one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two years ago, Shan Wei tried to fix this:
http://patchwork.ozlabs.org/patch/43905/

The problem is that RFC2460 requires an ICMP Time
Exceeded -- Fragment Reassembly Time Exceeded message should be
sent to the source of that fragment, if the defragmentation
times out.

"
   If insufficient fragments are received to complete reassembly of a
   packet within 60 seconds of the reception of the first-arriving
   fragment of that packet, reassembly of that packet must be
   abandoned and all the fragments that have been received for that
   packet must be discarded.  If the first fragment (i.e., the one
   with a Fragment Offset of zero) has been received, an ICMP Time
   Exceeded -- Fragment Reassembly Time Exceeded message should be
   sent to the source of that fragment.
"

As Herbert suggested, we could actually use the standard IPv6
reassembly code which follows RFC2460.

With this patch applied, I can see ICMP Time Exceeded sent
from the receiver when the sender sent out 3/4 fragmented
IPv6 UDP packet.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Michal Kubeček <mkubecek@suse.cz>
Cc: David Miller <davem@davemloft.net>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: netfilter-devel@vger.kernel.org
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h                      | 19 +++++++++
 net/ipv6/netfilter/nf_conntrack_reasm.c | 74 +++++++++------------------------
 net/ipv6/reassembly.c                   | 63 +++++++++-------------------
 3 files changed, 57 insertions(+), 99 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9bed5d483405..81d4455f6e14 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -411,6 +411,25 @@ struct ip6_create_arg {
 void ip6_frag_init(struct inet_frag_queue *q, void *a);
 bool ip6_frag_match(struct inet_frag_queue *q, void *a);
 
+/*
+ *	Equivalent of ipv4 struct ip
+ */
+struct frag_queue {
+	struct inet_frag_queue	q;
+
+	__be32			id;		/* fragment id		*/
+	u32			user;
+	struct in6_addr		saddr;
+	struct in6_addr		daddr;
+
+	int			iif;
+	unsigned int		csum;
+	__u16			nhoffset;
+};
+
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+			   struct inet_frags *frags);
+
 static inline bool ipv6_addr_any(const struct in6_addr *a)
 {
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index f40f327ccc0c..54274c33a0b1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -57,19 +57,6 @@ struct nf_ct_frag6_skb_cb
 
 #define NFCT_FRAG6_CB(skb)	((struct nf_ct_frag6_skb_cb*)((skb)->cb))
 
-struct nf_ct_frag6_queue
-{
-	struct inet_frag_queue	q;
-
-	__be32			id;		/* fragment id		*/
-	u32			user;
-	struct in6_addr		saddr;
-	struct in6_addr		daddr;
-
-	unsigned int		csum;
-	__u16			nhoffset;
-};
-
 static struct inet_frags nf_frags;
 
 #ifdef CONFIG_SYSCTL
@@ -151,9 +138,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
 
 static unsigned int nf_hashfn(struct inet_frag_queue *q)
 {
-	const struct nf_ct_frag6_queue *nq;
+	const struct frag_queue *nq;
 
-	nq = container_of(q, struct nf_ct_frag6_queue, q);
+	nq = container_of(q, struct frag_queue, q);
 	return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd);
 }
 
@@ -163,44 +150,21 @@ static void nf_skb_free(struct sk_buff *skb)
 		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
 }
 
-/* Destruction primitives. */
-
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
-{
-	inet_frag_put(&fq->q, &nf_frags);
-}
-
-/* Kill fq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
-{
-	inet_frag_kill(&fq->q, &nf_frags);
-}
-
 static void nf_ct_frag6_expire(unsigned long data)
 {
-	struct nf_ct_frag6_queue *fq;
-
-	fq = container_of((struct inet_frag_queue *)data,
-			struct nf_ct_frag6_queue, q);
+	struct frag_queue *fq;
+	struct net *net;
 
-	spin_lock(&fq->q.lock);
+	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	net = container_of(fq->q.net, struct net, nf_frag.frags);
 
-	if (fq->q.last_in & INET_FRAG_COMPLETE)
-		goto out;
-
-	fq_kill(fq);
-
-out:
-	spin_unlock(&fq->q.lock);
-	fq_put(fq);
+	ip6_expire_frag_queue(net, fq, &nf_frags);
 }
 
 /* Creation primitives. */
-static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id,
-						u32 user, struct in6_addr *src,
-						struct in6_addr *dst)
+static inline struct frag_queue *fq_find(struct net *net, __be32 id,
+					 u32 user, struct in6_addr *src,
+					 struct in6_addr *dst)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -219,14 +183,14 @@ static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id,
 	if (q == NULL)
 		goto oom;
 
-	return container_of(q, struct nf_ct_frag6_queue, q);
+	return container_of(q, struct frag_queue, q);
 
 oom:
 	return NULL;
 }
 
 
-static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
+static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
 			     const struct frag_hdr *fhdr, int nhoff)
 {
 	struct sk_buff *prev, *next;
@@ -367,7 +331,7 @@ found:
 	return 0;
 
 discard_fq:
-	fq_kill(fq);
+	inet_frag_kill(&fq->q, &nf_frags);
 err:
 	return -1;
 }
@@ -382,12 +346,12 @@ err:
  *	the last and the first frames arrived and all the bits are here.
  */
 static struct sk_buff *
-nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
+nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 {
 	struct sk_buff *fp, *op, *head = fq->q.fragments;
 	int    payload_len;
 
-	fq_kill(fq);
+	inet_frag_kill(&fq->q, &nf_frags);
 
 	WARN_ON(head == NULL);
 	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -570,7 +534,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 	struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
 				       : dev_net(skb->dev);
 	struct frag_hdr *fhdr;
-	struct nf_ct_frag6_queue *fq;
+	struct frag_queue *fq;
 	struct ipv6hdr *hdr;
 	int fhoff, nhoff;
 	u8 prevhdr;
@@ -619,7 +583,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
 		spin_unlock_bh(&fq->q.lock);
 		pr_debug("Can't insert skb to queue\n");
-		fq_put(fq);
+		inet_frag_put(&fq->q, &nf_frags);
 		goto ret_orig;
 	}
 
@@ -631,7 +595,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 	}
 	spin_unlock_bh(&fq->q.lock);
 
-	fq_put(fq);
+	inet_frag_put(&fq->q, &nf_frags);
 	return ret_skb;
 
 ret_orig:
@@ -695,7 +659,7 @@ int nf_ct_frag6_init(void)
 	nf_frags.constructor = ip6_frag_init;
 	nf_frags.destructor = NULL;
 	nf_frags.skb_free = nf_skb_free;
-	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+	nf_frags.qsize = sizeof(struct frag_queue);
 	nf_frags.match = ip6_frag_match;
 	nf_frags.frag_expire = nf_ct_frag6_expire;
 	nf_frags.secret_interval = 10 * 60 * HZ;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 4ff9af628e72..0ee553354ed5 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -65,24 +65,6 @@ struct ip6frag_skb_cb
 #define FRAG6_CB(skb)	((struct ip6frag_skb_cb*)((skb)->cb))
 
 
-/*
- *	Equivalent of ipv4 struct ipq
- */
-
-struct frag_queue
-{
-	struct inet_frag_queue	q;
-
-	__be32			id;		/* fragment id		*/
-	u32			user;
-	struct in6_addr		saddr;
-	struct in6_addr		daddr;
-
-	int			iif;
-	unsigned int		csum;
-	__u16			nhoffset;
-};
-
 static struct inet_frags ip6_frags;
 
 int ip6_frag_nqueues(struct net *net)
@@ -159,21 +141,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
 }
 EXPORT_SYMBOL(ip6_frag_init);
 
-/* Destruction primitives. */
-
-static __inline__ void fq_put(struct frag_queue *fq)
-{
-	inet_frag_put(&fq->q, &ip6_frags);
-}
-
-/* Kill fq entry. It is not destroyed immediately,
- * because caller (and someone more) holds reference count.
- */
-static __inline__ void fq_kill(struct frag_queue *fq)
-{
-	inet_frag_kill(&fq->q, &ip6_frags);
-}
-
 static void ip6_evictor(struct net *net, struct inet6_dev *idev)
 {
 	int evicted;
@@ -183,22 +150,18 @@ static void ip6_evictor(struct net *net, struct inet6_dev *idev)
 		IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
 }
 
-static void ip6_frag_expire(unsigned long data)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
+			   struct inet_frags *frags)
 {
-	struct frag_queue *fq;
 	struct net_device *dev = NULL;
-	struct net *net;
-
-	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
 
 	spin_lock(&fq->q.lock);
 
 	if (fq->q.last_in & INET_FRAG_COMPLETE)
 		goto out;
 
-	fq_kill(fq);
+	inet_frag_kill(&fq->q, frags);
 
-	net = container_of(fq->q.net, struct net, ipv6.frags);
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(net, fq->iif);
 	if (!dev)
@@ -222,7 +185,19 @@ out_rcu_unlock:
 	rcu_read_unlock();
 out:
 	spin_unlock(&fq->q.lock);
-	fq_put(fq);
+	inet_frag_put(&fq->q, frags);
+}
+EXPORT_SYMBOL(ip6_expire_frag_queue);
+
+static void ip6_frag_expire(unsigned long data)
+{
+	struct frag_queue *fq;
+	struct net *net;
+
+	fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+	net = container_of(fq->q.net, struct net, ipv6.frags);
+
+	ip6_expire_frag_queue(net, fq, &ip6_frags);
 }
 
 static __inline__ struct frag_queue *
@@ -391,7 +366,7 @@ found:
 	return -1;
 
 discard_fq:
-	fq_kill(fq);
+	inet_frag_kill(&fq->q, &ip6_frags);
 err:
 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 		      IPSTATS_MIB_REASMFAILS);
@@ -417,7 +392,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	unsigned int nhoff;
 	int sum_truesize;
 
-	fq_kill(fq);
+	inet_frag_kill(&fq->q, &ip6_frags);
 
 	/* Make the one we just received the head. */
 	if (prev) {
@@ -586,7 +561,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
 
 		spin_unlock(&fq->q.lock);
-		fq_put(fq);
+		inet_frag_put(&fq->q, &ip6_frags);
 		return ret;
 	}
 
-- 
cgit v1.2.3


From d4915c087f7c2457c580efc16fe0bfa1a576274d Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 18 Sep 2012 16:50:10 +0000
Subject: ipv6: make ip6_frag_nqueues() and ip6_frag_mem() static inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Michal Kubeček <mkubecek@suse.cz>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    | 13 +++++++++++--
 net/ipv6/reassembly.c | 10 ----------
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 81d4455f6e14..979bf6c13141 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -271,8 +271,17 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 
 extern bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb);
 
-int ip6_frag_nqueues(struct net *net);
-int ip6_frag_mem(struct net *net);
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int ip6_frag_nqueues(struct net *net)
+{
+	return net->ipv6.frags.nqueues;
+}
+
+static inline int ip6_frag_mem(struct net *net)
+{
+	return atomic_read(&net->ipv6.frags.mem);
+}
+#endif
 
 #define IPV6_FRAG_HIGH_THRESH	(256 * 1024)	/* 262144 */
 #define IPV6_FRAG_LOW_THRESH	(192 * 1024)	/* 196608 */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0ee553354ed5..cf74f4e79356 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -67,16 +67,6 @@ struct ip6frag_skb_cb
 
 static struct inet_frags ip6_frags;
 
-int ip6_frag_nqueues(struct net *net)
-{
-	return net->ipv6.frags.nqueues;
-}
-
-int ip6_frag_mem(struct net *net)
-{
-	return atomic_read(&net->ipv6.frags.mem);
-}
-
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			  struct net_device *dev);
 
-- 
cgit v1.2.3


From 6b102865e7ba9ff1e3c49c32c7187bb427d91798 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Tue, 18 Sep 2012 16:50:11 +0000
Subject: ipv6: unify fragment thresh handling code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Michal Kubeček <mkubecek@suse.cz>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h                 |  2 +-
 net/ipv4/inet_fragment.c                |  9 +++++++--
 net/ipv4/ip_fragment.c                  |  5 ++---
 net/ipv6/netfilter/nf_conntrack_reasm.c |  8 +++-----
 net/ipv6/reassembly.c                   | 16 +++++-----------
 5 files changed, 18 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 5098ee7b7e0e..32786a044718 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -61,7 +61,7 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
 void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
 void inet_frag_destroy(struct inet_frag_queue *q,
 				struct inet_frags *f, int *work);
-int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f);
+int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force);
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 		struct inet_frags *f, void *key, unsigned int hash)
 	__releases(&f->lock);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 85190e69297b..4750d2b74d79 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -89,7 +89,7 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
 	nf->low_thresh = 0;
 
 	local_bh_disable();
-	inet_frag_evictor(nf, f);
+	inet_frag_evictor(nf, f, true);
 	local_bh_enable();
 }
 EXPORT_SYMBOL(inet_frags_exit_net);
@@ -158,11 +158,16 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
 }
 EXPORT_SYMBOL(inet_frag_destroy);
 
-int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f)
+int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
 {
 	struct inet_frag_queue *q;
 	int work, evicted = 0;
 
+	if (!force) {
+		if (atomic_read(&nf->mem) <= nf->high_thresh)
+			return 0;
+	}
+
 	work = atomic_read(&nf->mem) - nf->low_thresh;
 	while (work > 0) {
 		read_lock(&f->lock);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fa6a12c51066..448e68546827 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -219,7 +219,7 @@ static void ip_evictor(struct net *net)
 {
 	int evicted;
 
-	evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
+	evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
 	if (evicted)
 		IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
 }
@@ -684,8 +684,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
 
 	/* Start by cleaning up the memory. */
-	if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
-		ip_evictor(net);
+	ip_evictor(net);
 
 	/* Lookup (or create) queue header */
 	if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 54274c33a0b1..1af12fde08df 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -566,11 +566,9 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 	hdr = ipv6_hdr(clone);
 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
-	if (atomic_read(&net->nf_frag.frags.mem) > net->nf_frag.frags.high_thresh) {
-		local_bh_disable();
-		inet_frag_evictor(&net->nf_frag.frags, &nf_frags);
-		local_bh_enable();
-	}
+	local_bh_disable();
+	inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
+	local_bh_enable();
 
 	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr);
 	if (fq == NULL) {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index cf74f4e79356..da8a4e301b1b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -131,15 +131,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
 }
 EXPORT_SYMBOL(ip6_frag_init);
 
-static void ip6_evictor(struct net *net, struct inet6_dev *idev)
-{
-	int evicted;
-
-	evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags);
-	if (evicted)
-		IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted);
-}
-
 void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
 			   struct inet_frags *frags)
 {
@@ -515,6 +506,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	struct frag_queue *fq;
 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct net *net = dev_net(skb_dst(skb)->dev);
+	int evicted;
 
 	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
 
@@ -539,8 +531,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		return 1;
 	}
 
-	if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh)
-		ip6_evictor(net, ip6_dst_idev(skb_dst(skb)));
+	evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
+	if (evicted)
+		IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
+				 IPSTATS_MIB_REASMFAILS, evicted);
 
 	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr);
 	if (fq != NULL) {
-- 
cgit v1.2.3


From 9baa0b0364103dd726384c71db30b74044754743 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 13 Sep 2012 05:56:36 +0000
Subject: IB/ipoib: Add rtnl_link_ops support

Add rtnl_link_ops to IPoIB, with the first usage being child device
create/delete through them. Childs devices are now either legacy ones,
created/deleted through the ipoib sysfs entries, or RTNL ones.

Adding support for RTNL childs involved refactoring of ipoib_vlan_add
which is now used by both the sysfs and the link_ops code.

Also, added ndo_uninit entry to support calling unregister_netdevice_queue
from the rtnl dellink entry. This required removal of calls to
ipoib_dev_cleanup from the driver in flows which use unregister_netdevice,
since the networking core will invoke ipoib_uninit which does exactly that.

Signed-off-by: Erez Shitrit <erezsh@mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/infiniband/ipoib.txt           |   3 +
 drivers/infiniband/ulp/ipoib/Makefile        |   3 +-
 drivers/infiniband/ulp/ipoib/ipoib.h         |  13 +++
 drivers/infiniband/ulp/ipoib/ipoib_main.c    |  25 ++++--
 drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 114 +++++++++++++++++++++++++++
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c    | 102 +++++++++++++-----------
 include/linux/if_link.h                      |  11 +++
 7 files changed, 220 insertions(+), 51 deletions(-)
 create mode 100644 drivers/infiniband/ulp/ipoib/ipoib_netlink.c

(limited to 'include')

diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 64eeb55d0c09..f2cfe265e836 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -24,6 +24,9 @@ Partitions and P_Keys
   The P_Key for any interface is given by the "pkey" file, and the
   main interface for a subinterface is in "parent."
 
+  Child interface create/delete can also be done using IPoIB's
+  rtnl_link_ops, where childs created using either way behave the same.
+
 Datagram vs Connected modes
 
   The IPoIB driver supports two modes of operation: datagram and
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile
index 3090100f0de7..e5430dd50764 100644
--- a/drivers/infiniband/ulp/ipoib/Makefile
+++ b/drivers/infiniband/ulp/ipoib/Makefile
@@ -5,7 +5,8 @@ ib_ipoib-y					:= ipoib_main.o \
 						   ipoib_multicast.o \
 						   ipoib_verbs.o \
 						   ipoib_vlan.o \
-						   ipoib_ethtool.o
+						   ipoib_ethtool.o \
+						   ipoib_netlink.o
 ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM)		+= ipoib_cm.o
 ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG)	+= ipoib_fs.o
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ca43901ed861..381f51b2ed61 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -104,6 +104,10 @@ enum {
 
 	MAX_SEND_CQE		  = 16,
 	IPOIB_CM_COPYBREAK	  = 256,
+
+	IPOIB_NON_CHILD		  = 0,
+	IPOIB_LEGACY_CHILD	  = 1,
+	IPOIB_RTNL_CHILD	  = 2,
 };
 
 #define	IPOIB_OP_RECV   (1ul << 31)
@@ -350,6 +354,7 @@ struct ipoib_dev_priv {
 	struct net_device *parent;
 	struct list_head child_intfs;
 	struct list_head list;
+	int    child_type;
 
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 	struct ipoib_cm_dev_priv cm;
@@ -509,6 +514,14 @@ void ipoib_event(struct ib_event_handler *handler,
 int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
 int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
 
+int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
+		     u16 pkey, int child_type);
+
+int  __init ipoib_netlink_init(void);
+void __exit ipoib_netlink_fini(void);
+
+void ipoib_setup(struct net_device *dev);
+
 void ipoib_pkey_poll(struct work_struct *work);
 int ipoib_pkey_dev_delay_open(struct net_device *dev);
 void ipoib_drain_cq(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3e2085a3ee47..b3e97096c446 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -173,6 +173,11 @@ static int ipoib_stop(struct net_device *dev)
 	return 0;
 }
 
+static void ipoib_uninit(struct net_device *dev)
+{
+	ipoib_dev_cleanup(dev);
+}
+
 static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -1262,6 +1267,9 @@ out:
 void ipoib_dev_cleanup(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
+	LIST_HEAD(head);
+
+	ASSERT_RTNL();
 
 	ipoib_delete_debug_files(dev);
 
@@ -1270,10 +1278,9 @@ void ipoib_dev_cleanup(struct net_device *dev)
 		/* Stop GC on child */
 		set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags);
 		cancel_delayed_work(&cpriv->neigh_reap_task);
-		unregister_netdev(cpriv->dev);
-		ipoib_dev_cleanup(cpriv->dev);
-		free_netdev(cpriv->dev);
+		unregister_netdevice_queue(cpriv->dev, &head);
 	}
+	unregister_netdevice_many(&head);
 
 	ipoib_ib_dev_cleanup(dev);
 
@@ -1291,6 +1298,7 @@ static const struct header_ops ipoib_header_ops = {
 };
 
 static const struct net_device_ops ipoib_netdev_ops = {
+	.ndo_uninit		 = ipoib_uninit,
 	.ndo_open		 = ipoib_open,
 	.ndo_stop		 = ipoib_stop,
 	.ndo_change_mtu		 = ipoib_change_mtu,
@@ -1300,7 +1308,7 @@ static const struct net_device_ops ipoib_netdev_ops = {
 	.ndo_set_rx_mode	 = ipoib_set_mcast_list,
 };
 
-static void ipoib_setup(struct net_device *dev)
+void ipoib_setup(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -1662,7 +1670,6 @@ static void ipoib_remove_one(struct ib_device *device)
 		flush_workqueue(ipoib_workqueue);
 
 		unregister_netdev(priv->dev);
-		ipoib_dev_cleanup(priv->dev);
 		free_netdev(priv->dev);
 	}
 
@@ -1714,8 +1721,15 @@ static int __init ipoib_init_module(void)
 	if (ret)
 		goto err_sa;
 
+	ret = ipoib_netlink_init();
+	if (ret)
+		goto err_client;
+
 	return 0;
 
+err_client:
+	ib_unregister_client(&ipoib_client);
+
 err_sa:
 	ib_sa_unregister_client(&ipoib_sa_client);
 	destroy_workqueue(ipoib_workqueue);
@@ -1728,6 +1742,7 @@ err_fs:
 
 static void __exit ipoib_cleanup_module(void)
 {
+	ipoib_netlink_fini();
 	ib_unregister_client(&ipoib_client);
 	ib_sa_unregister_client(&ipoib_sa_client);
 	ipoib_unregister_debugfs();
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
new file mode 100644
index 000000000000..a7dc5ea8370e
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2012 Mellanox Technologies. -  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <net/rtnetlink.h>
+#include "ipoib.h"
+
+static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
+	[IFLA_IPOIB_PKEY]	= { .type = NLA_U16 },
+};
+
+static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
+			       struct nlattr *tb[], struct nlattr *data[])
+{
+	struct net_device *pdev;
+	struct ipoib_dev_priv *ppriv;
+	u16 child_pkey;
+	int err;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!pdev)
+		return -ENODEV;
+
+	ppriv = netdev_priv(pdev);
+
+	if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
+		ipoib_warn(ppriv, "child creation disallowed for child devices\n");
+		return -EINVAL;
+	}
+
+	if (!data || !data[IFLA_IPOIB_PKEY]) {
+		ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n");
+		child_pkey  = ppriv->pkey;
+	} else
+		child_pkey  = nla_get_u16(data[IFLA_IPOIB_PKEY]);
+
+	err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
+
+	return err;
+}
+
+static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head)
+{
+	struct ipoib_dev_priv *priv, *ppriv;
+
+	priv = netdev_priv(dev);
+	ppriv = netdev_priv(priv->parent);
+
+	mutex_lock(&ppriv->vlan_mutex);
+	unregister_netdevice_queue(dev, head);
+	list_del(&priv->list);
+	mutex_unlock(&ppriv->vlan_mutex);
+}
+
+static size_t ipoib_get_size(const struct net_device *dev)
+{
+	return nla_total_size(2);	/* IFLA_IPOIB_PKEY */
+}
+
+static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
+	.kind		= "ipoib",
+	.maxtype	= IFLA_IPOIB_MAX,
+	.policy		= ipoib_policy,
+	.priv_size	= sizeof(struct ipoib_dev_priv),
+	.setup		= ipoib_setup,
+	.newlink	= ipoib_new_child_link,
+	.dellink	= ipoib_unregister_child_dev,
+	.get_size	= ipoib_get_size,
+};
+
+int __init ipoib_netlink_init(void)
+{
+	return rtnl_link_register(&ipoib_link_ops);
+}
+
+void __exit ipoib_netlink_fini(void)
+{
+	rtnl_link_unregister(&ipoib_link_ops);
+}
+
+MODULE_ALIAS_RTNL_LINK("ipoib");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index d7e9740c7248..238bbf9b2bea 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -49,47 +49,11 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
 }
 static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
 
-int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
+int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
+		     u16 pkey, int type)
 {
-	struct ipoib_dev_priv *ppriv, *priv;
-	char intf_name[IFNAMSIZ];
 	int result;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	ppriv = netdev_priv(pdev);
-
-	if (!rtnl_trylock())
-		return restart_syscall();
-	mutex_lock(&ppriv->vlan_mutex);
-
-	/*
-	 * First ensure this isn't a duplicate. We check the parent device and
-	 * then all of the child interfaces to make sure the Pkey doesn't match.
-	 */
-	if (ppriv->pkey == pkey) {
-		result = -ENOTUNIQ;
-		priv = NULL;
-		goto err;
-	}
-
-	list_for_each_entry(priv, &ppriv->child_intfs, list) {
-		if (priv->pkey == pkey) {
-			result = -ENOTUNIQ;
-			priv = NULL;
-			goto err;
-		}
-	}
-
-	snprintf(intf_name, sizeof intf_name, "%s.%04x",
-		 ppriv->dev->name, pkey);
-	priv = ipoib_intf_alloc(intf_name);
-	if (!priv) {
-		result = -ENOMEM;
-		goto err;
-	}
-
 	priv->max_ib_mtu = ppriv->max_ib_mtu;
 	/* MTU will be reset when mcast join happens */
 	priv->dev->mtu   = IPOIB_UD_MTU(priv->max_ib_mtu);
@@ -134,14 +98,13 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	if (device_create_file(&priv->dev->dev, &dev_attr_parent))
 		goto sysfs_failed;
 
+	priv->child_type = type;
 	list_add_tail(&priv->list, &ppriv->child_intfs);
 
-	mutex_unlock(&ppriv->vlan_mutex);
-	rtnl_unlock();
-
 	return 0;
 
 sysfs_failed:
+	result = -ENOMEM;
 	ipoib_delete_debug_files(priv->dev);
 	unregister_netdevice(priv->dev);
 
@@ -149,11 +112,60 @@ register_failed:
 	ipoib_dev_cleanup(priv->dev);
 
 err:
+	return result;
+}
+
+int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
+{
+	struct ipoib_dev_priv *ppriv, *priv;
+	char intf_name[IFNAMSIZ];
+	struct ipoib_dev_priv *tpriv;
+	int result;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	ppriv = netdev_priv(pdev);
+
+	snprintf(intf_name, sizeof intf_name, "%s.%04x",
+		 ppriv->dev->name, pkey);
+	priv = ipoib_intf_alloc(intf_name);
+	if (!priv)
+		return -ENOMEM;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	mutex_lock(&ppriv->vlan_mutex);
+
+	/*
+	 * First ensure this isn't a duplicate. We check the parent device and
+	 * then all of the legacy child interfaces to make sure the Pkey
+	 * doesn't match.
+	 */
+	if (ppriv->pkey == pkey) {
+		result = -ENOTUNIQ;
+		goto out;
+	}
+
+	list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
+		if (tpriv->pkey == pkey &&
+		    tpriv->child_type == IPOIB_LEGACY_CHILD) {
+			result = -ENOTUNIQ;
+			goto out;
+		}
+	}
+
+	result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
+
+out:
 	mutex_unlock(&ppriv->vlan_mutex);
-	rtnl_unlock();
-	if (priv)
+
+	if (result)
 		free_netdev(priv->dev);
 
+	rtnl_unlock();
+
 	return result;
 }
 
@@ -171,9 +183,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 		return restart_syscall();
 	mutex_lock(&ppriv->vlan_mutex);
 	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
-		if (priv->pkey == pkey) {
+		if (priv->pkey == pkey &&
+		    priv->child_type == IPOIB_LEGACY_CHILD) {
 			unregister_netdevice(priv->dev);
-			ipoib_dev_cleanup(priv->dev);
 			list_del(&priv->list);
 			dev = priv->dev;
 			break;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index ac173bd2ab65..24c0dd09af54 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -398,4 +398,15 @@ struct ifla_port_vsi {
 	__u8 pad[3];
 };
 
+
+/* IPoIB section */
+
+enum {
+	IFLA_IPOIB_UNSPEC,
+	IFLA_IPOIB_PKEY,
+	__IFLA_IPOIB_MAX
+};
+
+#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
+
 #endif /* _LINUX_IF_LINK_H */
-- 
cgit v1.2.3


From bb68b64724a4fd6b93d83b39aeffa4aadb2562fc Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Tue, 18 Sep 2012 14:19:23 +0000
Subject: ipv4: Don't add TCP-code in inet_sock_destruct

Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Acked-by: H.K. Jerry Chu <hkchu@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 4 ++++
 net/ipv4/af_inet.c  | 2 --
 net/ipv4/tcp.c      | 7 +++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ae46df590629..67c789ae719c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -574,6 +574,8 @@ static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc)
 	return foc->len != -1;
 }
 
+extern void tcp_sock_destruct(struct sock *sk);
+
 static inline int fastopen_init_queue(struct sock *sk, int backlog)
 {
 	struct request_sock_queue *queue =
@@ -585,6 +587,8 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog)
 		    sk->sk_allocation);
 		if (queue->fastopenq == NULL)
 			return -ENOMEM;
+
+		sk->sk_destruct = tcp_sock_destruct;
 		spin_lock_init(&queue->fastopenq->lock);
 	}
 	queue->fastopenq->max_qlen = backlog;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 845372b025f6..766c59658563 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -149,8 +149,6 @@ void inet_sock_destruct(struct sock *sk)
 		pr_err("Attempt to release alive inet socket %p\n", sk);
 		return;
 	}
-	if (sk->sk_protocol == IPPROTO_TCP)
-		kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
 
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index df83d744e380..7b1e940393cf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2325,6 +2325,13 @@ int tcp_disconnect(struct sock *sk, int flags)
 }
 EXPORT_SYMBOL(tcp_disconnect);
 
+void tcp_sock_destruct(struct sock *sk)
+{
+	inet_sock_destruct(sk);
+
+	kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
+}
+
 static inline bool tcp_can_repair_sock(const struct sock *sk)
 {
 	return capable(CAP_NET_ADMIN) &&
-- 
cgit v1.2.3


From abb17e6c0c7b27693201dc85f75dbb184279fd10 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 21 Sep 2012 09:35:38 +0000
Subject: netlink: use <linux/export.h> instead of <linux/module.h>

Since (9f00d97 netlink: hide struct module parameter in netlink_kernel_create),
linux/netlink.h includes linux/module.h because of the use of THIS_MODULE.

Use linux/export.h instead, as suggested by Stephen Rothwell, which is
significantly smaller and defines THIS_MODULES.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 73ade5fbc856..b3dc992fb25b 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -153,7 +153,7 @@ struct nlattr {
 
 #include <linux/capability.h>
 #include <linux/skbuff.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <net/scm.h>
 
 struct net;
-- 
cgit v1.2.3


From 1ef761582c074448bae5be97abde5934667e7710 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 22 Sep 2012 07:02:03 +0000
Subject: ptp: link the phc device to its parent device

PTP Hardware Clock devices appear as class devices in sysfs. This patch
changes the registration API to use the parent device, clarifying the
clock's relationship to the underlying device.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/gianfar_ptp.c | 2 +-
 drivers/net/ethernet/intel/igb/igb_ptp.c     | 3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 3 ++-
 drivers/net/ethernet/sfc/ptp.c               | 3 ++-
 drivers/net/phy/dp83640.c                    | 2 +-
 drivers/ptp/ptp_clock.c                      | 5 +++--
 drivers/ptp/ptp_ixp46x.c                     | 2 +-
 drivers/ptp/ptp_pch.c                        | 2 +-
 include/linux/ptp_clock_kernel.h             | 7 +++++--
 9 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index c08e5d40fecb..18762a3ccce4 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -510,7 +510,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
 
 	spin_unlock_irqrestore(&etsects->lock, flags);
 
-	etsects->clock = ptp_clock_register(&etsects->caps);
+	etsects->clock = ptp_clock_register(&etsects->caps, &dev->dev);
 	if (IS_ERR(etsects->clock)) {
 		err = PTR_ERR(etsects->clock);
 		goto no_clock;
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index e13ba1d5369f..ee21445157a3 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -752,7 +752,8 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		wr32(E1000_IMS, E1000_IMS_TS);
 	}
 
-	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps);
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
 		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 3456d5617143..39881cb17a4b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -960,7 +960,8 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter)
 	/* (Re)start the overflow check */
 	adapter->flags2 |= IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED;
 
-	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps);
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
 		e_dev_err("ptp_clock_register failed\n");
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 2b07a4eae07e..5b3dd028ce85 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -931,7 +931,8 @@ static int efx_ptp_probe_channel(struct efx_channel *channel)
 	ptp->phc_clock_info.settime = efx_phc_settime;
 	ptp->phc_clock_info.enable = efx_phc_enable;
 
-	ptp->phc_clock = ptp_clock_register(&ptp->phc_clock_info);
+	ptp->phc_clock = ptp_clock_register(&ptp->phc_clock_info,
+					    &efx->pci_dev->dev);
 	if (!ptp->phc_clock)
 		goto fail3;
 
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index b0da0226661f..24e05c43bff8 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -980,7 +980,7 @@ static int dp83640_probe(struct phy_device *phydev)
 
 	if (choose_this_phy(clock, phydev)) {
 		clock->chosen = dp83640;
-		clock->ptp_clock = ptp_clock_register(&clock->caps);
+		clock->ptp_clock = ptp_clock_register(&clock->caps, &phydev->dev);
 		if (IS_ERR(clock->ptp_clock)) {
 			err = PTR_ERR(clock->ptp_clock);
 			goto no_register;
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index c470ddf1a6fa..79f4bce061bd 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -182,7 +182,8 @@ static void delete_ptp_clock(struct posix_clock *pc)
 
 /* public interface */
 
-struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info)
+struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+				     struct device *parent)
 {
 	struct ptp_clock *ptp;
 	int err = 0, index, major = MAJOR(ptp_devt);
@@ -215,7 +216,7 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info)
 	init_waitqueue_head(&ptp->tsev_wq);
 
 	/* Create a new device in our class. */
-	ptp->dev = device_create(ptp_class, NULL, ptp->devid, ptp,
+	ptp->dev = device_create(ptp_class, parent, ptp->devid, ptp,
 				 "ptp%d", ptp->index);
 	if (IS_ERR(ptp->dev))
 		goto no_device;
diff --git a/drivers/ptp/ptp_ixp46x.c b/drivers/ptp/ptp_ixp46x.c
index e03c40692b00..d49b85164fd2 100644
--- a/drivers/ptp/ptp_ixp46x.c
+++ b/drivers/ptp/ptp_ixp46x.c
@@ -298,7 +298,7 @@ static int __init ptp_ixp_init(void)
 
 	ixp_clock.caps = ptp_ixp_caps;
 
-	ixp_clock.ptp_clock = ptp_clock_register(&ixp_clock.caps);
+	ixp_clock.ptp_clock = ptp_clock_register(&ixp_clock.caps, NULL);
 
 	if (IS_ERR(ixp_clock.ptp_clock))
 		return PTR_ERR(ixp_clock.ptp_clock);
diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index 3a9c17eced10..e624e4dd2abb 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -627,7 +627,7 @@ pch_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	chip->caps = ptp_pch_caps;
-	chip->ptp_clock = ptp_clock_register(&chip->caps);
+	chip->ptp_clock = ptp_clock_register(&chip->caps, &pdev->dev);
 
 	if (IS_ERR(chip->ptp_clock))
 		return PTR_ERR(chip->ptp_clock);
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index a644b29f1161..56c71b27112e 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -21,6 +21,7 @@
 #ifndef _PTP_CLOCK_KERNEL_H_
 #define _PTP_CLOCK_KERNEL_H_
 
+#include <linux/device.h>
 #include <linux/pps_kernel.h>
 #include <linux/ptp_clock.h>
 
@@ -93,10 +94,12 @@ struct ptp_clock;
 /**
  * ptp_clock_register() - register a PTP hardware clock driver
  *
- * @info:  Structure describing the new clock.
+ * @info:   Structure describing the new clock.
+ * @parent: Pointer to the parent device of the new clock.
  */
 
-extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info);
+extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
+					    struct device *parent);
 
 /**
  * ptp_clock_unregister() - unregister a PTP hardware clock driver
-- 
cgit v1.2.3


From de46584675fad02b7f8255f31be0ea1be5cd185b Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 22 Sep 2012 07:02:04 +0000
Subject: ptp: clarify the clock_name sysfs attribute

There has been some confusion among PHC driver authors about the
intended purpose of the clock_name attribute. This patch expands the
documation in order to clarify how the clock_name field should be
understood.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-ptp | 6 +++++-
 include/linux/ptp_clock_kernel.h    | 4 +++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp
index d40d2b550502..05aeedf17794 100644
--- a/Documentation/ABI/testing/sysfs-ptp
+++ b/Documentation/ABI/testing/sysfs-ptp
@@ -19,7 +19,11 @@ Date:		September 2010
 Contact:	Richard Cochran <richardcochran@gmail.com>
 Description:
 		This file contains the name of the PTP hardware clock
-		as a human readable string.
+		as a human readable string. The purpose of this
+		attribute is to provide the user with a "friendly
+		name" and to help distinguish PHY based devices from
+		MAC based ones. The string does not necessarily have
+		to be any kind of unique id.
 
 What:		/sys/class/ptp/ptpN/max_adjustment
 Date:		September 2010
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 56c71b27112e..f2dc6d8fc680 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -42,7 +42,9 @@ struct ptp_clock_request {
  * struct ptp_clock_info - decribes a PTP hardware clock
  *
  * @owner:     The clock driver should set to THIS_MODULE.
- * @name:      A short name to identify the clock.
+ * @name:      A short "friendly name" to identify the clock and to
+ *             help distinguish PHY based devices from MAC based ones.
+ *             The string is not meant to be a unique id.
  * @max_adj:   The maximum possible frequency adjustment, in parts per billon.
  * @n_alarm:   The number of programmable alarms.
  * @n_ext_ts:  The number of external time stamp channels.
-- 
cgit v1.2.3


From 623df484a777f3c00c1ea3d6a7565b8d8ac688a1 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sat, 22 Sep 2012 04:18:54 +0000
Subject: tcp: extract code to compute SYNACK RTT

In preparation for adding another spot where we compute the SYNACK
RTT, extract this code so that it can be shared.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   | 9 +++++++++
 net/ipv4/tcp_ipv4.c | 4 +---
 net/ipv6/tcp_ipv6.c | 4 +---
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a8cb00c0c6d9..a718d0e3d8e7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1137,6 +1137,15 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->loc_port = tcp_hdr(skb)->dest;
 }
 
+/* Compute time elapsed between SYNACK and the ACK completing 3WHS */
+static inline void tcp_synack_rtt_meas(struct sock *sk,
+				       struct request_sock *req)
+{
+	if (tcp_rsk(req)->snt_synack)
+		tcp_valid_rtt_meas(sk,
+		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
+}
+
 extern void tcp_enter_memory_pressure(struct sock *sk);
 
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e64abed249cc..1e66f7fb4fe6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1733,9 +1733,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
 
 	tcp_initialize_rcv_mss(newsk);
-	if (tcp_rsk(req)->snt_synack)
-		tcp_valid_rtt_meas(newsk,
-		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
+	tcp_synack_rtt_meas(newsk, req);
 	newtp->total_retrans = req->retrans;
 
 #ifdef CONFIG_TCP_MD5SIG
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f3bfb8bbfdec..cfeeeb7fcf54 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1348,9 +1348,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
 
 	tcp_initialize_rcv_mss(newsk);
-	if (tcp_rsk(req)->snt_synack)
-		tcp_valid_rtt_meas(newsk,
-		    tcp_time_stamp - tcp_rsk(req)->snt_synack);
+	tcp_synack_rtt_meas(newsk, req);
 	newtp->total_retrans = req->retrans;
 
 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
-- 
cgit v1.2.3


From 016818d076871c4ee34db1e8d74dc17ac1de626a Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Sat, 22 Sep 2012 04:18:55 +0000
Subject: tcp: TCP Fast Open Server - take SYNACK RTT after completing 3WHS

When taking SYNACK RTT samples for servers using TCP Fast Open, fix
the code to ensure that we only call tcp_valid_rtt_meas() after we
receive the ACK that completes the 3-way handshake.

Previously we were always taking an RTT sample in
tcp_v4_syn_recv_sock(). However, for TCP Fast Open connections
tcp_v4_conn_req_fastopen() calls tcp_v4_syn_recv_sock() at the time we
receive the SYN. So for TFO we must wait until tcp_rcv_state_process()
to take the RTT sample.

To fix this, we wait until after TFO calls tcp_v4_syn_recv_sock()
before we set the snt_synack timestamp, since tcp_synack_rtt_meas()
already ensures that we only take a SYNACK RTT sample if snt_synack is
non-zero. To be careful, we only take a snt_synack timestamp when
a SYNACK transmit or retransmit succeeds.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    |  1 +
 net/ipv4/tcp_input.c |  1 +
 net/ipv4/tcp_ipv4.c  | 12 +++++++++---
 net/ipv6/tcp_ipv6.c  |  2 +-
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a718d0e3d8e7..6feeccd83dd7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1125,6 +1125,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	req->cookie_ts = 0;
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+	tcp_rsk(req)->snt_synack = 0;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
 	ireq->tstamp_ok = rx_opt->tstamp_ok;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e2bec815ff23..bb326684495b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5983,6 +5983,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 * need req so release it.
 				 */
 				if (req) {
+					tcp_synack_rtt_meas(sk, req);
 					reqsk_fastopen_remove(sk, req, false);
 				} else {
 					/* Make sure socket is routed, for
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1e66f7fb4fe6..0a7e020f16b5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -868,6 +868,8 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 					    ireq->rmt_addr,
 					    ireq->opt);
 		err = net_xmit_eval(err);
+		if (!tcp_rsk(req)->snt_synack && !err)
+			tcp_rsk(req)->snt_synack = tcp_time_stamp;
 	}
 
 	return err;
@@ -1382,6 +1384,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct sock *child;
+	int err;
 
 	req->retrans = 0;
 	req->sk = NULL;
@@ -1393,8 +1396,11 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
 		kfree_skb(skb_synack);
 		return -1;
 	}
-	ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
-			ireq->rmt_addr, ireq->opt);
+	err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
+				    ireq->rmt_addr, ireq->opt);
+	err = net_xmit_eval(err);
+	if (!err)
+		tcp_rsk(req)->snt_synack = tcp_time_stamp;
 	/* XXX (TFO) - is it ok to ignore error and continue? */
 
 	spin_lock(&queue->fastopenq->lock);
@@ -1612,7 +1618,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		isn = tcp_v4_init_sequence(skb);
 	}
 	tcp_rsk(req)->snt_isn = isn;
-	tcp_rsk(req)->snt_synack = tcp_time_stamp;
 
 	if (dst == NULL) {
 		dst = inet_csk_route_req(sk, &fl4, req);
@@ -1650,6 +1655,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (err || want_cookie)
 			goto drop_and_free;
 
+		tcp_rsk(req)->snt_synack = tcp_time_stamp;
 		tcp_rsk(req)->listener = NULL;
 		/* Add the request_sock to the SYN table */
 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index cfeeeb7fcf54..d6212d6bc8d8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1169,7 +1169,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	}
 have_isn:
 	tcp_rsk(req)->snt_isn = isn;
-	tcp_rsk(req)->snt_synack = tcp_time_stamp;
 
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_release;
@@ -1180,6 +1179,7 @@ have_isn:
 	    want_cookie)
 		goto drop_and_free;
 
+	tcp_rsk(req)->snt_synack = tcp_time_stamp;
 	tcp_rsk(req)->listener = NULL;
 	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	return 0;
-- 
cgit v1.2.3


From 85f8c13e4122eb04fae9d3801eabaa45e3006a88 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 22 Sep 2012 22:42:08 +0200
Subject: netfilter: ipset: Rewrite cidr book keeping to handle /0

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set_ahash.h | 104 ++++++++++++++-------------
 1 file changed, 55 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h
index b114d35aea5e..495acdf7251f 100644
--- a/include/linux/netfilter/ipset/ip_set_ahash.h
+++ b/include/linux/netfilter/ipset/ip_set_ahash.h
@@ -137,50 +137,59 @@ htable_bits(u32 hashsize)
 #endif
 
 #define SET_HOST_MASK(family)	(family == AF_INET ? 32 : 128)
+#ifdef IP_SET_HASH_WITH_MULTI
+#define NETS_LENGTH(family)	(SET_HOST_MASK(family) + 1)
+#else
+#define NETS_LENGTH(family)	SET_HOST_MASK(family)
+#endif
 
 /* Network cidr size book keeping when the hash stores different
  * sized networks */
 static void
-add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
+add_cidr(struct ip_set_hash *h, u8 cidr, u8 nets_length)
 {
-	u8 i;
-
-	++h->nets[cidr-1].nets;
-
-	pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
+	int i, j;
 
-	if (h->nets[cidr-1].nets > 1)
-		return;
-
-	/* New cidr size */
-	for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
-		/* Add in increasing prefix order, so larger cidr first */
-		if (h->nets[i].cidr < cidr)
-			swap(h->nets[i].cidr, cidr);
+	/* Add in increasing prefix order, so larger cidr first */
+	for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
+		if (j != -1)
+			continue;
+		else if (h->nets[i].cidr < cidr)
+			j = i;
+		else if (h->nets[i].cidr == cidr) {
+			h->nets[i].nets++;
+			return;
+		}
+	}
+	if (j != -1) {
+		for (; i > j; i--) {
+			h->nets[i].cidr = h->nets[i - 1].cidr;
+			h->nets[i].nets = h->nets[i - 1].nets;
+		}
 	}
-	if (i < host_mask)
-		h->nets[i].cidr = cidr;
+	h->nets[i].cidr = cidr;
+	h->nets[i].nets = 1;
 }
 
 static void
-del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
+del_cidr(struct ip_set_hash *h, u8 cidr, u8 nets_length)
 {
-	u8 i;
-
-	--h->nets[cidr-1].nets;
+	u8 i, j;
 
-	pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
+	for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
+		;
+	h->nets[i].nets--;
 
-	if (h->nets[cidr-1].nets != 0)
+	if (h->nets[i].nets != 0)
 		return;
 
-	/* All entries with this cidr size deleted, so cleanup h->cidr[] */
-	for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
-		if (h->nets[i].cidr == cidr)
-			h->nets[i].cidr = cidr = h->nets[i+1].cidr;
+	for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
+		h->nets[j].cidr = h->nets[j + 1].cidr;
+		h->nets[j].nets = h->nets[j + 1].nets;
 	}
-	h->nets[i - 1].cidr = 0;
 }
+#else
+#define NETS_LENGTH(family)		0
 #endif
 
 /* Destroy the hashtable part of the set */
@@ -202,14 +211,14 @@ ahash_destroy(struct htable *t)
 
 /* Calculate the actual memory size of the set data */
 static size_t
-ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
+ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 nets_length)
 {
 	u32 i;
 	struct htable *t = h->table;
 	size_t memsize = sizeof(*h)
 			 + sizeof(*t)
 #ifdef IP_SET_HASH_WITH_NETS
-			 + sizeof(struct ip_set_hash_nets) * host_mask
+			 + sizeof(struct ip_set_hash_nets) * nets_length
 #endif
 			 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
 
@@ -238,7 +247,7 @@ ip_set_hash_flush(struct ip_set *set)
 	}
 #ifdef IP_SET_HASH_WITH_NETS
 	memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
-			   * SET_HOST_MASK(set->family));
+			   * NETS_LENGTH(set->family));
 #endif
 	h->elements = 0;
 }
@@ -271,9 +280,6 @@ ip_set_hash_destroy(struct ip_set *set)
 (jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval)	\
 	& jhash_mask(htable_bits))
 
-#define CONCAT(a, b, c)		a##b##c
-#define TOKEN(a, b, c)		CONCAT(a, b, c)
-
 /* Type/family dependent function prototypes */
 
 #define type_pf_data_equal	TOKEN(TYPE, PF, _data_equal)
@@ -478,7 +484,7 @@ type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
 	}
 
 #ifdef IP_SET_HASH_WITH_NETS
-	add_cidr(h, CIDR(d->cidr), HOST_MASK);
+	add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
 #endif
 	h->elements++;
 out:
@@ -513,7 +519,7 @@ type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
 		n->pos--;
 		h->elements--;
 #ifdef IP_SET_HASH_WITH_NETS
-		del_cidr(h, CIDR(d->cidr), HOST_MASK);
+		del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
 #endif
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
@@ -546,10 +552,10 @@ type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
 	const struct type_pf_elem *data;
 	int i, j = 0;
 	u32 key, multi = 0;
-	u8 host_mask = SET_HOST_MASK(set->family);
+	u8 nets_length = NETS_LENGTH(set->family);
 
 	pr_debug("test by nets\n");
-	for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
+	for (; j < nets_length && h->nets[j].nets && !multi; j++) {
 		type_pf_data_netmask(d, h->nets[j].cidr);
 		key = HKEY(d, h->initval, t->htable_bits);
 		n = hbucket(t, key);
@@ -604,7 +610,7 @@ type_pf_head(struct ip_set *set, struct sk_buff *skb)
 	memsize = ahash_memsize(h, with_timeout(h->timeout)
 					? sizeof(struct type_pf_telem)
 					: sizeof(struct type_pf_elem),
-				set->family == AF_INET ? 32 : 128);
+				NETS_LENGTH(set->family));
 	read_unlock_bh(&set->lock);
 
 	nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
@@ -783,7 +789,7 @@ type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
 
 /* Delete expired elements from the hashtable */
 static void
-type_pf_expire(struct ip_set_hash *h)
+type_pf_expire(struct ip_set_hash *h, u8 nets_length)
 {
 	struct htable *t = h->table;
 	struct hbucket *n;
@@ -798,7 +804,7 @@ type_pf_expire(struct ip_set_hash *h)
 			if (type_pf_data_expired(data)) {
 				pr_debug("expired %u/%u\n", i, j);
 #ifdef IP_SET_HASH_WITH_NETS
-				del_cidr(h, CIDR(data->cidr), HOST_MASK);
+				del_cidr(h, CIDR(data->cidr), nets_length);
 #endif
 				if (j != n->pos - 1)
 					/* Not last one */
@@ -839,7 +845,7 @@ type_pf_tresize(struct ip_set *set, bool retried)
 	if (!retried) {
 		i = h->elements;
 		write_lock_bh(&set->lock);
-		type_pf_expire(set->data);
+		type_pf_expire(set->data, NETS_LENGTH(set->family));
 		write_unlock_bh(&set->lock);
 		if (h->elements <  i)
 			return 0;
@@ -904,7 +910,7 @@ type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
 
 	if (h->elements >= h->maxelem)
 		/* FIXME: when set is full, we slow down here */
-		type_pf_expire(h);
+		type_pf_expire(h, NETS_LENGTH(set->family));
 	if (h->elements >= h->maxelem) {
 		if (net_ratelimit())
 			pr_warning("Set %s is full, maxelem %u reached\n",
@@ -933,8 +939,8 @@ type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
 	if (j != AHASH_MAX(h) + 1) {
 		data = ahash_tdata(n, j);
 #ifdef IP_SET_HASH_WITH_NETS
-		del_cidr(h, CIDR(data->cidr), HOST_MASK);
-		add_cidr(h, CIDR(d->cidr), HOST_MASK);
+		del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
+		add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
 #endif
 		type_pf_data_copy(data, d);
 		type_pf_data_timeout_set(data, timeout);
@@ -952,7 +958,7 @@ type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
 	}
 
 #ifdef IP_SET_HASH_WITH_NETS
-	add_cidr(h, CIDR(d->cidr), HOST_MASK);
+	add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
 #endif
 	h->elements++;
 out:
@@ -986,7 +992,7 @@ type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
 		n->pos--;
 		h->elements--;
 #ifdef IP_SET_HASH_WITH_NETS
-		del_cidr(h, CIDR(d->cidr), HOST_MASK);
+		del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
 #endif
 		if (n->pos + AHASH_INIT_SIZE < n->size) {
 			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
@@ -1016,9 +1022,9 @@ type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
 	struct hbucket *n;
 	int i, j = 0;
 	u32 key, multi = 0;
-	u8 host_mask = SET_HOST_MASK(set->family);
+	u8 nets_length = NETS_LENGTH(set->family);
 
-	for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
+	for (; j < nets_length && h->nets[j].nets && !multi; j++) {
 		type_pf_data_netmask(d, h->nets[j].cidr);
 		key = HKEY(d, h->initval, t->htable_bits);
 		n = hbucket(t, key);
@@ -1147,7 +1153,7 @@ type_pf_gc(unsigned long ul_set)
 
 	pr_debug("called\n");
 	write_lock_bh(&set->lock);
-	type_pf_expire(h);
+	type_pf_expire(h, NETS_LENGTH(set->family));
 	write_unlock_bh(&set->lock);
 
 	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
-- 
cgit v1.2.3


From 10111a6ef373c377e87730749a0f68210c3fd062 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 21 Sep 2012 21:59:32 +0200
Subject: netfilter: ipset: Include supported revisions in module description

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      |  6 ++++++
 net/netfilter/ipset/ip_set_bitmap_ip.c      |  9 ++++++---
 net/netfilter/ipset/ip_set_bitmap_ipmac.c   |  9 ++++++---
 net/netfilter/ipset/ip_set_bitmap_port.c    |  9 ++++++---
 net/netfilter/ipset/ip_set_hash_ip.c        |  9 ++++++---
 net/netfilter/ipset/ip_set_hash_ipport.c    |  9 ++++++---
 net/netfilter/ipset/ip_set_hash_ipportip.c  |  9 ++++++---
 net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 ++++++++-----
 net/netfilter/ipset/ip_set_hash_net.c       | 11 +++++++----
 net/netfilter/ipset/ip_set_hash_netiface.c  | 11 +++++++----
 net/netfilter/ipset/ip_set_hash_netport.c   | 13 ++++++++-----
 net/netfilter/ipset/ip_set_list_set.c       |  9 ++++++---
 12 files changed, 78 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 2edc64cab739..1d954c6c4e08 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -206,9 +206,15 @@ enum ip_set_kopt {
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
+#include <linux/stringify.h>
 #include <linux/vmalloc.h>
 #include <net/netlink.h>
 
+#define _IP_SET_MODULE_DESC(a, b, c)		\
+	MODULE_DESCRIPTION(a " type of IP sets, revisions " b "-" c)
+#define IP_SET_MODULE_DESC(a, b, c)		\
+	_IP_SET_MODULE_DESC(a, __stringify(b), __stringify(c))
+
 /* Set features */
 enum ip_set_feature {
 	IPSET_TYPE_IP_FLAG = 0,
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 02184b5ef9e1..4a92fd47bd4c 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -27,9 +27,12 @@
 #define IP_SET_BITMAP_TIMEOUT
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 
+#define REVISION_MIN	0
+#define REVISION_MAX	0
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("bitmap:ip type of IP sets");
+IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip");
 
 /* Type structure */
@@ -556,8 +559,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_IPV4,
-	.revision_min	= 0,
-	.revision_max	= 0,
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= bitmap_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 6819d3cff919..645c9d13c194 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -26,9 +26,12 @@
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_bitmap.h>
 
+#define REVISION_MIN	0
+#define REVISION_MAX	0
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("bitmap:ip,mac type of IP sets");
+IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_bitmap:ip,mac");
 
 enum {
@@ -630,8 +633,8 @@ static struct ip_set_type bitmap_ipmac_type = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_MAC,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_IPV4,
-	.revision_min	= 0,
-	.revision_max	= 0,
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= bitmap_ipmac_create,
 	.create_policy	= {
 		[IPSET_ATTR_IP]		= { .type = NLA_NESTED },
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index b9f1fce7053b..e6b2db76f4c3 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -22,9 +22,12 @@
 #define IP_SET_BITMAP_TIMEOUT
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 
+#define REVISION_MIN	0
+#define REVISION_MAX	0
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("bitmap:port type of IP sets");
+IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_bitmap:port");
 
 /* Type structure */
@@ -487,8 +490,8 @@ static struct ip_set_type bitmap_port_type = {
 	.features	= IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	.revision_max	= 0,
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= bitmap_port_create,
 	.create_policy	= {
 		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index bc8f76edc260..ec3dba5dcd62 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -24,9 +24,12 @@
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
+#define REVISION_MIN	0
+#define REVISION_MAX	0
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:ip type of IP sets");
+IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:ip");
 
 /* Type specific function prefix */
@@ -452,8 +455,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	.revision_max	= 0,
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= hash_ip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 6760fd4d7858..0171f7502fa5 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -25,9 +25,12 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
+#define REVISION_MIN	0
+#define REVISION_MAX	1 /* SCTP and UDPLITE support added */
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:ip,port type of IP sets");
+IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port");
 
 /* Type specific function prefix */
@@ -523,8 +526,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	.revision_max	= 1,	/* SCTP and UDPLITE support added */
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= hash_ipport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index ac09bec274f1..6344ef551ec8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -25,9 +25,12 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
+#define REVISION_MIN	0
+#define REVISION_MAX	1 /* SCTP and UDPLITE support added */
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:ip,port,ip type of IP sets");
+IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,ip");
 
 /* Type specific function prefix */
@@ -541,8 +544,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	.revision_max	= 1,	/* SCTP and UDPLITE support added */
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= hash_ipportip_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 242814e4db4e..8ee916875a23 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -25,9 +25,14 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
+#define REVISION_MIN	0
+/*			1    SCTP and UDPLITE support added */
+/*			2    Range as input support for IPv4 added */
+#define REVISION_MAX	3 /* nomatch flag support added */
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:ip,port,net type of IP sets");
+IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:ip,port,net");
 
 /* Type specific function prefix */
@@ -695,10 +700,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	/*		  1	   SCTP and UDPLITE support added */
-	/*		  2	   Range as input support for IPv4 added */
-	.revision_max	= 3,	/* nomatch flag support added */
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= hash_ipportnet_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index d676093822b1..014ff7272f7b 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -23,9 +23,13 @@
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
+#define REVISION_MIN	0
+/*			1    Range as input support for IPv4 added */
+#define REVISION_MAX	2 /* nomatch flag support added */
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:net type of IP sets");
+IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:net");
 
 /* Type specific function prefix */
@@ -535,9 +539,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
 	.features	= IPSET_TYPE_IP,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	/*		= 1 	   Range as input support for IPv4 added */
-	.revision_max	= 2,	/* nomatch flag support added */
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= hash_net_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index e7c671dd3cce..a5c8491d265e 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -24,9 +24,13 @@
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
+#define REVISION_MIN	0
+/*			1    nomatch flag support added */
+#define REVISION_MAX	2 /* /0 support added */
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:net,iface type of IP sets");
+IP_SET_MODULE_DESC("hash:net,iface", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:net,iface");
 
 /* Interface name rbtree */
@@ -773,9 +777,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_IFACE,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	/*		= 1,	   nomatch flag support added */
-	.revision_max	= 2,	/* /0 support added */
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= hash_netiface_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 3ec27fccddd7..7ca357a62b1c 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -24,9 +24,14 @@
 #include <linux/netfilter/ipset/ip_set_getport.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
 
+#define REVISION_MIN	0
+/*			1    SCTP and UDPLITE support added */
+/*			2    Range as input support for IPv4 added */
+#define REVISION_MAX	3 /* nomatch flag support added */
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("hash:net,port type of IP sets");
+IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_hash:net,port");
 
 /* Type specific function prefix */
@@ -648,10 +653,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
 	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	/*		  1	   SCTP and UDPLITE support added */
-	/*		  2,	   Range as input support for IPv4 added */
-	.revision_max	= 3,	/* nomatch flag support added */
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= hash_netport_create,
 	.create_policy	= {
 		[IPSET_ATTR_HASHSIZE]	= { .type = NLA_U32 },
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 6cb1225765f9..8371c2bac2e4 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -16,9 +16,12 @@
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_list.h>
 
+#define REVISION_MIN	0
+#define REVISION_MAX	0
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("list:set type of IP sets");
+IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
 MODULE_ALIAS("ip_set_list:set");
 
 /* Member elements without and with timeout */
@@ -579,8 +582,8 @@ static struct ip_set_type list_set_type __read_mostly = {
 	.features	= IPSET_TYPE_NAME | IPSET_DUMP_LAST,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
-	.revision_min	= 0,
-	.revision_max	= 0,
+	.revision_min	= REVISION_MIN,
+	.revision_max	= REVISION_MAX,
 	.create		= list_set_create,
 	.create_policy	= {
 		[IPSET_ATTR_SIZE]	= { .type = NLA_U32 },
-- 
cgit v1.2.3


From 3ace95c0ac125a042cfb682d0a9bbdbf1e5a2c65 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 21 Sep 2012 22:01:45 +0200
Subject: netfilter: ipset: Coding style fixes

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h       | 5 +++--
 include/linux/netfilter/ipset/ip_set_ahash.h | 2 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c    | 4 ++--
 net/netfilter/ipset/ip_set_core.c            | 9 ++++++---
 4 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 1d954c6c4e08..0c1e97b3acfb 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -255,7 +255,7 @@ struct ip_set_type_variant {
 	 *		returns negative error code,
 	 *			zero for no match/success to add/delete
 	 *			positive for matching element */
-	int (*kadt)(struct ip_set *set, const struct sk_buff * skb,
+	int (*kadt)(struct ip_set *set, const struct sk_buff *skb,
 		    const struct xt_action_param *par,
 		    enum ipset_adt adt, const struct ip_set_adt_opt *opt);
 
@@ -430,7 +430,8 @@ static inline int nla_put_ipaddr4(struct sk_buff *skb, int type, __be32 ipaddr)
 	return ret;
 }
 
-static inline int nla_put_ipaddr6(struct sk_buff *skb, int type, const struct in6_addr *ipaddrptr)
+static inline int nla_put_ipaddr6(struct sk_buff *skb, int type,
+				  const struct in6_addr *ipaddrptr)
 {
 	struct nlattr *__nested = ipset_nest_start(skb, type);
 	int ret;
diff --git a/include/linux/netfilter/ipset/ip_set_ahash.h b/include/linux/netfilter/ipset/ip_set_ahash.h
index 495acdf7251f..ef9acd3c8450 100644
--- a/include/linux/netfilter/ipset/ip_set_ahash.h
+++ b/include/linux/netfilter/ipset/ip_set_ahash.h
@@ -696,7 +696,7 @@ nla_put_failure:
 }
 
 static int
-type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
+type_pf_kadt(struct ip_set *set, const struct sk_buff *skb,
 	     const struct xt_action_param *par,
 	     enum ipset_adt adt, const struct ip_set_adt_opt *opt);
 static int
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 645c9d13c194..0f92dc24cb89 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -323,11 +323,11 @@ bitmap_ipmac_tlist(const struct ip_set *set,
 		    (elem->match == MAC_FILLED &&
 		     nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN,
 			     elem->ether)))
-		    goto nla_put_failure;
+			goto nla_put_failure;
 		timeout = elem->match == MAC_UNSET ? elem->timeout
 				: ip_set_timeout_get(elem->timeout);
 		if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)))
-		    goto nla_put_failure;
+			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
 	ipset_nest_end(skb, atd);
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index ad39ef406851..72e9bf0ef90d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -69,7 +69,8 @@ find_set_type(const char *name, u8 family, u8 revision)
 
 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
 		if (STREQ(type->name, name) &&
-		    (type->family == family || type->family == NFPROTO_UNSPEC) &&
+		    (type->family == family ||
+		     type->family == NFPROTO_UNSPEC) &&
 		    revision >= type->revision_min &&
 		    revision <= type->revision_max)
 			return type;
@@ -149,7 +150,8 @@ __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
 	rcu_read_lock();
 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
 		if (STREQ(type->name, name) &&
-		    (type->family == family || type->family == NFPROTO_UNSPEC)) {
+		    (type->family == family ||
+		     type->family == NFPROTO_UNSPEC)) {
 			found = true;
 			if (type->revision_min < *min)
 				*min = type->revision_min;
@@ -721,7 +723,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * by the nfnl mutex. Find the first free index in ip_set_list
 	 * and check clashing.
 	 */
-	if ((ret = find_free_id(set->name, &index, &clash)) != 0) {
+	ret = find_free_id(set->name, &index, &clash);
+	if (ret != 0) {
 		/* If this is the same set and requested, ignore error */
 		if (ret == -EEXIST &&
 		    (flags & IPSET_FLAG_EXIST) &&
-- 
cgit v1.2.3


From 3e0304a583d72c747caa8afac76b8d514aa293f5 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 21 Sep 2012 22:02:36 +0200
Subject: netfilter: ipset: Support to match elements marked with "nomatch"

Exceptions can now be matched and we can branch according to the
possible cases:

a. match in the set if the element is not flagged as "nomatch"
b. match in the set if the element is flagged with "nomatch"
c. no match

i.e.

iptables ... -m set --match-set ... -j ...
iptables ... -m set --match-set ... --nomatch-entries -j ...
...

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      |  4 ++++
 net/netfilter/ipset/ip_set_core.c           |  6 ++++++
 net/netfilter/ipset/ip_set_hash_ipportnet.c | 11 ++++++-----
 net/netfilter/ipset/ip_set_hash_net.c       | 10 +++++-----
 net/netfilter/ipset/ip_set_hash_netiface.c  | 11 ++++++-----
 net/netfilter/ipset/ip_set_hash_netport.c   | 10 +++++-----
 net/netfilter/xt_set.c                      | 22 ++++++++++++++++++++++
 7 files changed, 54 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 0c1e97b3acfb..528697b3c152 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -190,6 +190,7 @@ enum ip_set_dim {
 	 * If changed, new revision of iptables match/target is required.
 	 */
 	IPSET_DIM_MAX = 6,
+	IPSET_BIT_RETURN_NOMATCH = 7,
 };
 
 /* Option flags for kernel operations */
@@ -198,6 +199,7 @@ enum ip_set_kopt {
 	IPSET_DIM_ONE_SRC = (1 << IPSET_DIM_ONE),
 	IPSET_DIM_TWO_SRC = (1 << IPSET_DIM_TWO),
 	IPSET_DIM_THREE_SRC = (1 << IPSET_DIM_THREE),
+	IPSET_RETURN_NOMATCH = (1 << IPSET_BIT_RETURN_NOMATCH),
 };
 
 #ifdef __KERNEL__
@@ -229,6 +231,8 @@ enum ip_set_feature {
 	IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
 	IPSET_TYPE_IFACE_FLAG = 5,
 	IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG),
+	IPSET_TYPE_NOMATCH_FLAG = 6,
+	IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG),
 	/* Strictly speaking not a feature, but a flag for dumping:
 	 * this settype must be dumped last */
 	IPSET_DUMP_LAST_FLAG = 7,
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 72e9bf0ef90d..778465f217fa 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -370,6 +370,12 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 		set->variant->kadt(set, skb, par, IPSET_ADD, opt);
 		write_unlock_bh(&set->lock);
 		ret = 1;
+	} else {
+		/* --return-nomatch: invert matched element */
+		if ((opt->flags & IPSET_RETURN_NOMATCH) &&
+		    (set->type->features & IPSET_TYPE_NOMATCH) &&
+		    (ret > 0 || ret == -ENOTEMPTY))
+			ret = -ret;
 	}
 
 	/* Convert error codes to nomatch */
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 8ee916875a23..cb71f9a774e7 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -104,10 +104,10 @@ hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags)
 	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
 }
 
-static inline bool
+static inline int
 hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem)
 {
-	return !elem->nomatch;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
@@ -411,10 +411,10 @@ hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags)
 	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
 }
 
-static inline bool
+static inline int
 hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem)
 {
-	return !elem->nomatch;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
@@ -697,7 +697,8 @@ hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 static struct ip_set_type hash_ipportnet_type __read_mostly = {
 	.name		= "hash:ip,port,net",
 	.protocol	= IPSET_PROTOCOL,
-	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 |
+			  IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_THREE,
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= REVISION_MIN,
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 014ff7272f7b..29e94b981f3f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -90,10 +90,10 @@ hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags)
 	dst->nomatch = flags & IPSET_FLAG_NOMATCH;
 }
 
-static inline bool
+static inline int
 hash_net4_data_match(const struct hash_net4_elem *elem)
 {
-	return !elem->nomatch;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
@@ -311,10 +311,10 @@ hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags)
 	dst->nomatch = flags & IPSET_FLAG_NOMATCH;
 }
 
-static inline bool
+static inline int
 hash_net6_data_match(const struct hash_net6_elem *elem)
 {
-	return !elem->nomatch;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
@@ -536,7 +536,7 @@ hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 static struct ip_set_type hash_net_type __read_mostly = {
 	.name		= "hash:net",
 	.protocol	= IPSET_PROTOCOL,
-	.features	= IPSET_TYPE_IP,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_ONE,
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= REVISION_MIN,
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index a5c8491d265e..b9a63381e349 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -201,10 +201,10 @@ hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags)
 	dst->nomatch = flags & IPSET_FLAG_NOMATCH;
 }
 
-static inline bool
+static inline int
 hash_netiface4_data_match(const struct hash_netiface4_elem *elem)
 {
-	return !elem->nomatch;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
@@ -497,10 +497,10 @@ hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags)
 	dst->nomatch = flags & IPSET_FLAG_NOMATCH;
 }
 
-static inline bool
+static inline int
 hash_netiface6_data_match(const struct hash_netiface6_elem *elem)
 {
-	return !elem->nomatch;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
@@ -774,7 +774,8 @@ hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 static struct ip_set_type hash_netiface_type __read_mostly = {
 	.name		= "hash:net,iface",
 	.protocol	= IPSET_PROTOCOL,
-	.features	= IPSET_TYPE_IP | IPSET_TYPE_IFACE,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_IFACE |
+			  IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= REVISION_MIN,
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 7ca357a62b1c..7ef700de596c 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -104,10 +104,10 @@ hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags)
 	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
 }
 
-static inline bool
+static inline int
 hash_netport4_data_match(const struct hash_netport4_elem *elem)
 {
-	return !elem->nomatch;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
@@ -375,10 +375,10 @@ hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags)
 	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
 }
 
-static inline bool
+static inline int
 hash_netport6_data_match(const struct hash_netport6_elem *elem)
 {
-	return !elem->nomatch;
+	return elem->nomatch ? -ENOTEMPTY : 1;
 }
 
 static inline void
@@ -650,7 +650,7 @@ hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
 static struct ip_set_type hash_netport_type __read_mostly = {
 	.name		= "hash:net,port",
 	.protocol	= IPSET_PROTOCOL,
-	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT,
+	.features	= IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_NOMATCH,
 	.dimension	= IPSET_DIM_TWO,
 	.family		= NFPROTO_UNSPEC,
 	.revision_min	= REVISION_MIN,
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index c6f7db720d84..865a9e54f3ad 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -356,6 +356,27 @@ static struct xt_match set_matches[] __read_mostly = {
 		.destroy	= set_match_v1_destroy,
 		.me		= THIS_MODULE
 	},
+	/* --return-nomatch flag support */
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 2,
+		.match		= set_match_v1,
+		.matchsize	= sizeof(struct xt_set_info_match_v1),
+		.checkentry	= set_match_v1_checkentry,
+		.destroy	= set_match_v1_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV6,
+		.revision	= 2,
+		.match		= set_match_v1,
+		.matchsize	= sizeof(struct xt_set_info_match_v1),
+		.checkentry	= set_match_v1_checkentry,
+		.destroy	= set_match_v1_destroy,
+		.me		= THIS_MODULE
+	},
 };
 
 static struct xt_target set_targets[] __read_mostly = {
@@ -389,6 +410,7 @@ static struct xt_target set_targets[] __read_mostly = {
 		.destroy	= set_target_v1_destroy,
 		.me		= THIS_MODULE
 	},
+	/* --timeout and --exist flags support */
 	{
 		.name		= "SET",
 		.revision	= 2,
-- 
cgit v1.2.3


From c9d2ea96ca3bbc85264803ff6bd66eb3bbefdb77 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 23 Sep 2012 02:09:23 -0400
Subject: netlink: Rearrange netlink_kernel_cfg to save space on 64-bit.

Suggested by Jan Engelhardt.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index b3dc992fb25b..f80c56ac4d82 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -183,10 +183,10 @@ extern void netlink_table_ungrab(void);
 /* optional Netlink kernel configuration parameters */
 struct netlink_kernel_cfg {
 	unsigned int	groups;
+	unsigned int	flags;
 	void		(*input)(struct sk_buff *skb);
 	struct mutex	*cb_mutex;
 	void		(*bind)(int group);
-	unsigned int	flags;
 };
 
 extern struct sock *__netlink_kernel_create(struct net *net, int unit,
-- 
cgit v1.2.3


From 54eb3df3a7d01b6cd395bdc1098280f2f93fbec5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 17 Sep 2012 00:23:09 +0000
Subject: netfilter: xt_time: add support to ignore day transition

Currently, if you want to do something like:
"match Monday, starting 23:00, for two hours"
You need two rules, one for Mon 23:00 to 0:00 and one for Tue 0:00-1:00.

The rule: --weekdays Mo --timestart 23:00  --timestop 01:00

looks correct, but it will first match on monday from midnight to 1 a.m.
and then again for another hour from 23:00 onwards.

This permits userspace to explicitly ignore the day transition and
match for a single, continuous time period instead.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_time.h |  5 +++++
 net/netfilter/xt_time.c           | 24 +++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_time.h b/include/linux/netfilter/xt_time.h
index 7c37fac576c4..095886019396 100644
--- a/include/linux/netfilter/xt_time.h
+++ b/include/linux/netfilter/xt_time.h
@@ -17,6 +17,9 @@ enum {
 	/* Match against local time (instead of UTC) */
 	XT_TIME_LOCAL_TZ = 1 << 0,
 
+	/* treat timestart > timestop (e.g. 23:00-01:00) as single period */
+	XT_TIME_CONTIGUOUS = 1 << 1,
+
 	/* Shortcuts */
 	XT_TIME_ALL_MONTHDAYS = 0xFFFFFFFE,
 	XT_TIME_ALL_WEEKDAYS  = 0xFE,
@@ -24,4 +27,6 @@ enum {
 	XT_TIME_MAX_DAYTIME   = 24 * 60 * 60 - 1,
 };
 
+#define XT_TIME_ALL_FLAGS (XT_TIME_LOCAL_TZ|XT_TIME_CONTIGUOUS)
+
 #endif /* _XT_TIME_H */
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index c48975ff8ea2..0ae55a36f492 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -42,6 +42,7 @@ static const u_int16_t days_since_leapyear[] = {
  */
 enum {
 	DSE_FIRST = 2039,
+	SECONDS_PER_DAY = 86400,
 };
 static const u_int16_t days_since_epoch[] = {
 	/* 2039 - 2030 */
@@ -78,7 +79,7 @@ static inline unsigned int localtime_1(struct xtm *r, time_t time)
 	unsigned int v, w;
 
 	/* Each day has 86400s, so finding the hour/minute is actually easy. */
-	v         = time % 86400;
+	v         = time % SECONDS_PER_DAY;
 	r->second = v % 60;
 	w         = v / 60;
 	r->minute = w % 60;
@@ -199,6 +200,18 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		if (packet_time < info->daytime_start &&
 		    packet_time > info->daytime_stop)
 			return false;
+
+		/** if user asked to ignore 'next day', then e.g.
+		 *  '1 PM Wed, August 1st' should be treated
+		 *  like 'Tue 1 PM July 31st'.
+		 *
+		 * This also causes
+		 * 'Monday, "23:00 to 01:00", to match for 2 hours, starting
+		 * Monday 23:00 to Tuesday 01:00.
+		 */
+		if ((info->flags & XT_TIME_CONTIGUOUS) &&
+		     packet_time <= info->daytime_stop)
+			stamp -= SECONDS_PER_DAY;
 	}
 
 	localtime_2(&current_time, stamp);
@@ -227,6 +240,15 @@ static int time_mt_check(const struct xt_mtchk_param *par)
 		return -EDOM;
 	}
 
+	if (info->flags & ~XT_TIME_ALL_FLAGS) {
+		pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS);
+		return -EINVAL;
+	}
+
+	if ((info->flags & XT_TIME_CONTIGUOUS) &&
+	     info->daytime_start < info->daytime_stop)
+		return -EINVAL;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7be54ca4764bdead40bee7b645a72718c20ff2c8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 21 Sep 2012 16:52:08 +0200
Subject: netfilter: nf_ct_ftp: add sequence tracking pickup facility for
 injected entries

This patch allows the FTP helper to pickup the sequence tracking from
the first packet seen. This is useful to fix the breakage of the first
FTP command after the failover while using conntrackd to synchronize
states.

The seq_aft_nl_num field in struct nf_ct_ftp_info has been shrinked to
16-bits (enough for what it does), so we can use the remaining 16-bits
to store the flags while using the same size for the private FTP helper
data.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_ftp.h |  6 +++++-
 net/netfilter/nf_conntrack_ftp.c           | 21 +++++++++++++++++++++
 net/netfilter/nf_conntrack_netlink.c       |  4 ++--
 net/netfilter/nfnetlink_cthelper.c         |  3 +++
 4 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h
index 28f18df36525..8faf3f792d13 100644
--- a/include/linux/netfilter/nf_conntrack_ftp.h
+++ b/include/linux/netfilter/nf_conntrack_ftp.h
@@ -18,13 +18,17 @@ enum nf_ct_ftp_type {
 
 #define FTP_PORT	21
 
+#define NF_CT_FTP_SEQ_PICKUP	(1 << 0)
+
 #define NUM_SEQ_TO_REMEMBER 2
 /* This structure exists only once per master */
 struct nf_ct_ftp_master {
 	/* Valid seq positions for cmd matching after newline */
 	u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
 	/* 0 means seq_match_aft_nl not set */
-	int seq_aft_nl_num[IP_CT_DIR_MAX];
+	u_int16_t seq_aft_nl_num[IP_CT_DIR_MAX];
+	/* pickup sequence tracking, useful for conntrackd */
+	u_int16_t flags[IP_CT_DIR_MAX];
 };
 
 struct nf_conntrack_expect;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index f8cc26ad4456..1ce3befb7c8a 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -396,6 +396,12 @@ static int help(struct sk_buff *skb,
 
 	/* Look up to see if we're just after a \n. */
 	if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
+		/* We're picking up this, clear flags and let it continue */
+		if (unlikely(ct_ftp_info->flags[dir] & NF_CT_FTP_SEQ_PICKUP)) {
+			ct_ftp_info->flags[dir] ^= NF_CT_FTP_SEQ_PICKUP;
+			goto skip_nl_seq;
+		}
+
 		/* Now if this ends in \n, update ftp info. */
 		pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n",
 			 ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
@@ -406,6 +412,7 @@ static int help(struct sk_buff *skb,
 		goto out_update_nl;
 	}
 
+skip_nl_seq:
 	/* Initialize IP/IPv6 addr to expected address (it's not mentioned
 	   in EPSV responses) */
 	cmd.l3num = nf_ct_l3num(ct);
@@ -512,6 +519,19 @@ out_update_nl:
 	return ret;
 }
 
+static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
+{
+	struct nf_ct_ftp_master *ftp = nfct_help_data(ct);
+
+	/* This conntrack has been injected from user-space, always pick up
+	 * sequence tracking. Otherwise, the first FTP command after the
+	 * failover breaks.
+	 */
+	ftp->flags[IP_CT_DIR_ORIGINAL] |= NF_CT_FTP_SEQ_PICKUP;
+	ftp->flags[IP_CT_DIR_REPLY] |= NF_CT_FTP_SEQ_PICKUP;
+	return 0;
+}
+
 static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly;
 
 static const struct nf_conntrack_expect_policy ftp_exp_policy = {
@@ -561,6 +581,7 @@ static int __init nf_conntrack_ftp_init(void)
 			ftp[i][j].expect_policy = &ftp_exp_policy;
 			ftp[i][j].me = THIS_MODULE;
 			ftp[i][j].help = help;
+			ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr;
 			if (ports[i] == FTP_PORT)
 				sprintf(ftp[i][j].name, "ftp");
 			else
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2dcd080b8c4f..7bbfb3deea30 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1238,7 +1238,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
 	if (help) {
 		if (help->helper == helper) {
 			/* update private helper data if allowed. */
-			if (helper->from_nlattr && helpinfo)
+			if (helper->from_nlattr)
 				helper->from_nlattr(helpinfo, ct);
 			return 0;
 		} else
@@ -1467,7 +1467,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
 				goto err2;
 			}
 			/* set private helper data if allowed. */
-			if (helper->from_nlattr && helpinfo)
+			if (helper->from_nlattr)
 				helper->from_nlattr(helpinfo, ct);
 
 			/* not in hash table yet so not strictly necessary */
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 3678073360a3..945950a8b1f1 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -85,6 +85,9 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
 {
 	const struct nf_conn_help *help = nfct_help(ct);
 
+	if (attr == NULL)
+		return -EINVAL;
+
 	if (help->helper->data_len == 0)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 6ee584be3ee30f72dec8a8ca87bc10824e27a631 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 24 Sep 2012 14:52:12 +0200
Subject: netfilter: nfnetlink_queue: add NFQA_CAP_LEN attribute

This patch adds the NFQA_CAP_LEN attribute that allows us to know
what is the real packet size from user-space (even if we decided
to retrieve just a few bytes from the packet instead of all of it).

Security software that inspects packets should always check for
this new attribute to make sure that it is inspecting the entire
packet.

This also helps to provide a workaround for the problem described
in: http://marc.info/?l=netfilter-devel&m=134519473212536&w=2

Original idea from Florian Westphal.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink_queue.h | 1 +
 net/netfilter/nfnetlink_queue_core.c      | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
index 3b1c1360aedf..70ec8c2bc11a 100644
--- a/include/linux/netfilter/nfnetlink_queue.h
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -44,6 +44,7 @@ enum nfqnl_attr_type {
 	NFQA_PAYLOAD,			/* opaque data payload */
 	NFQA_CT,			/* nf_conntrack_netlink.h */
 	NFQA_CT_INFO,			/* enum ip_conntrack_info */
+	NFQA_CAP_LEN,			/* __u32 length of captured packet */
 
 	__NFQA_MAX
 };
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 3e4ddcb7e781..e12d44e75b21 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -225,7 +225,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 {
 	sk_buff_data_t old_tail;
 	size_t size;
-	size_t data_len = 0;
+	size_t data_len = 0, cap_len = 0;
 	struct sk_buff *skb;
 	struct nlattr *nla;
 	struct nfqnl_msg_packet_hdr *pmsg;
@@ -247,7 +247,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #endif
 		+ nla_total_size(sizeof(u_int32_t))	/* mark */
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
-		+ nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
+		+ nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)
+		+ nla_total_size(sizeof(u_int32_t)));	/* cap_len */
 
 	outdev = entry->outdev;
 
@@ -266,6 +267,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			data_len = entskb->len;
 
 		size += nla_total_size(data_len);
+		cap_len = entskb->len;
 		break;
 	}
 
@@ -402,6 +404,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
 		goto nla_put_failure;
 
+	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
+		goto nla_put_failure;
+
 	nlh->nlmsg_len = skb->tail - old_tail;
 	return skb;
 
-- 
cgit v1.2.3


From 2a6c8c7998f95b140f3d3c7ac5dce2fbd6d403e3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 24 Sep 2012 15:52:33 -0400
Subject: net: Remove unnecessary NULL check in scm_destroy().

All callers provide a non-NULL scm argument.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/scm.h b/include/net/scm.h
index 456695f5cbc4..975cca01048b 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -73,7 +73,7 @@ static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
 static __inline__ void scm_destroy(struct scm_cookie *scm)
 {
 	scm_destroy_cred(scm);
-	if (scm && scm->fp)
+	if (scm->fp)
 		__scm_destroy(scm);
 }
 
-- 
cgit v1.2.3


From 5640f7685831e088fe6c2e1f863a6805962f8e81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 23 Sep 2012 23:04:42 +0000
Subject: net: use a per task frag allocator

We currently use a per socket order-0 page cache for tcp_sendmsg()
operations.

This page is used to build fragments for skbs.

Its done to increase probability of coalescing small write() into
single segments in skbs still in write queue (not yet sent)

But it wastes a lot of memory for applications handling many mostly
idle sockets, since each socket holds one page in sk->sk_sndmsg_page

Its also quite inefficient to build TSO 64KB packets, because we need
about 16 pages per skb on arches where PAGE_SIZE = 4096, so we hit
page allocator more than wanted.

This patch adds a per task frag allocator and uses bigger pages,
if available. An automatic fallback is done in case of memory pressure.

(up to 32768 bytes per frag, thats order-3 pages on x86)

This increases TCP stream performance by 20% on loopback device,
but also benefits on other network devices, since 8x less frags are
mapped on transmit and unmapped on tx completion. Alexander Duyck
mentioned a probable performance win on systems with IOMMU enabled.

Its possible some SG enabled hardware cant cope with bigger fragments,
but their ndo_start_xmit() should already handle this, splitting a
fragment in sub fragments, since some arches have PAGE_SIZE=65536

Successfully tested on various ethernet devices.
(ixgbe, igb, bnx2x, tg3, mellanox mlx4)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Vijay Subramanian <subramanian.vijay@gmail.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sched.h   |  3 ++
 include/net/inet_sock.h |  4 +--
 include/net/sock.h      | 27 +++++++++--------
 kernel/exit.c           |  3 ++
 kernel/fork.c           |  1 +
 net/core/skbuff.c       | 37 ++++++-----------------
 net/core/sock.c         | 49 ++++++++++++++++++++++++++++--
 net/ipv4/ip_output.c    | 70 ++++++++++++++++++-------------------------
 net/ipv4/raw.c          | 19 +++++++-----
 net/ipv4/tcp.c          | 79 ++++++++++++++-----------------------------------
 net/ipv4/tcp_ipv4.c     |  8 -----
 net/ipv6/ip6_output.c   | 65 ++++++++++++++++------------------------
 net/sched/em_meta.c     |  2 +-
 13 files changed, 167 insertions(+), 200 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b8c86648a2f9..a8e2413f6bc3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1530,6 +1530,9 @@ struct task_struct {
 	 * cache last used pipe for splice
 	 */
 	struct pipe_inode_info *splice_pipe;
+
+	struct page_frag task_frag;
+
 #ifdef	CONFIG_TASK_DELAY_ACCT
 	struct task_delay_info *delays;
 #endif
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 613cfa401672..256c1ed2d69a 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -101,10 +101,8 @@ struct inet_cork {
 	__be32			addr;
 	struct ip_options	*opt;
 	unsigned int		fragsize;
-	struct dst_entry	*dst;
 	int			length; /* Total length of all frames */
-	struct page		*page;
-	u32			off;
+	struct dst_entry	*dst;
 	u8			tx_flags;
 };
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 84bdaeca1314..f036493b9a61 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -247,8 +247,7 @@ struct cg_proto;
   *	@sk_stamp: time stamp of last packet received
   *	@sk_socket: Identd and reporting IO signals
   *	@sk_user_data: RPC layer private data
-  *	@sk_sndmsg_page: cached page for sendmsg
-  *	@sk_sndmsg_off: cached offset for sendmsg
+  *	@sk_frag: cached page frag
   *	@sk_peek_off: current peek_offset value
   *	@sk_send_head: front of stuff to transmit
   *	@sk_security: used by security modules
@@ -362,9 +361,8 @@ struct sock {
 	ktime_t			sk_stamp;
 	struct socket		*sk_socket;
 	void			*sk_user_data;
-	struct page		*sk_sndmsg_page;
+	struct page_frag	sk_frag;
 	struct sk_buff		*sk_send_head;
-	__u32			sk_sndmsg_off;
 	__s32			sk_peek_off;
 	int			sk_write_pending;
 #ifdef CONFIG_SECURITY
@@ -2034,18 +2032,23 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
 
 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
 
-static inline struct page *sk_stream_alloc_page(struct sock *sk)
+/**
+ * sk_page_frag - return an appropriate page_frag
+ * @sk: socket
+ *
+ * If socket allocation mode allows current thread to sleep, it means its
+ * safe to use the per task page_frag instead of the per socket one.
+ */
+static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-	struct page *page = NULL;
+	if (sk->sk_allocation & __GFP_WAIT)
+		return &current->task_frag;
 
-	page = alloc_pages(sk->sk_allocation, 0);
-	if (!page) {
-		sk_enter_memory_pressure(sk);
-		sk_stream_moderate_sndbuf(sk);
-	}
-	return page;
+	return &sk->sk_frag;
 }
 
+extern bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
+
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/kernel/exit.c b/kernel/exit.c
index f65345f9e5bb..42f25952edd9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1046,6 +1046,9 @@ void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
+	if (tsk->task_frag.page)
+		put_page(tsk->task_frag.page);
+
 	validate_creds_for_do_exit(tsk);
 
 	preempt_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index 2c8857e12855..01565b9ce0f3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -330,6 +330,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	tsk->btrace_seq = 0;
 #endif
 	tsk->splice_pipe = NULL;
+	tsk->task_frag.page = NULL;
 
 	account_kernel_stack(ti, 1);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fe00d1208167..2ede3cfa8ffa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1655,38 +1655,19 @@ static struct page *linear_to_page(struct page *page, unsigned int *len,
 				   unsigned int *offset,
 				   struct sk_buff *skb, struct sock *sk)
 {
-	struct page *p = sk->sk_sndmsg_page;
-	unsigned int off;
+	struct page_frag *pfrag = sk_page_frag(sk);
 
-	if (!p) {
-new_page:
-		p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
-		if (!p)
-			return NULL;
-
-		off = sk->sk_sndmsg_off = 0;
-		/* hold one ref to this page until it's full */
-	} else {
-		unsigned int mlen;
-
-		/* If we are the only user of the page, we can reset offset */
-		if (page_count(p) == 1)
-			sk->sk_sndmsg_off = 0;
-		off = sk->sk_sndmsg_off;
-		mlen = PAGE_SIZE - off;
-		if (mlen < 64 && mlen < *len) {
-			put_page(p);
-			goto new_page;
-		}
+	if (!sk_page_frag_refill(sk, pfrag))
+		return NULL;
 
-		*len = min_t(unsigned int, *len, mlen);
-	}
+	*len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
 
-	memcpy(page_address(p) + off, page_address(page) + *offset, *len);
-	sk->sk_sndmsg_off += *len;
-	*offset = off;
+	memcpy(page_address(pfrag->page) + pfrag->offset,
+	       page_address(page) + *offset, *len);
+	*offset = pfrag->offset;
+	pfrag->offset += *len;
 
-	return p;
+	return pfrag->page;
 }
 
 static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
diff --git a/net/core/sock.c b/net/core/sock.c
index 2693f7649222..727114cd6f7e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1744,6 +1744,45 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
 }
 EXPORT_SYMBOL(sock_alloc_send_skb);
 
+/* On 32bit arches, an skb frag is limited to 2^15 */
+#define SKB_FRAG_PAGE_ORDER	get_order(32768)
+
+bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+{
+	int order;
+
+	if (pfrag->page) {
+		if (atomic_read(&pfrag->page->_count) == 1) {
+			pfrag->offset = 0;
+			return true;
+		}
+		if (pfrag->offset < pfrag->size)
+			return true;
+		put_page(pfrag->page);
+	}
+
+	/* We restrict high order allocations to users that can afford to wait */
+	order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
+
+	do {
+		gfp_t gfp = sk->sk_allocation;
+
+		if (order)
+			gfp |= __GFP_COMP | __GFP_NOWARN;
+		pfrag->page = alloc_pages(gfp, order);
+		if (likely(pfrag->page)) {
+			pfrag->offset = 0;
+			pfrag->size = PAGE_SIZE << order;
+			return true;
+		}
+	} while (--order >= 0);
+
+	sk_enter_memory_pressure(sk);
+	sk_stream_moderate_sndbuf(sk);
+	return false;
+}
+EXPORT_SYMBOL(sk_page_frag_refill);
+
 static void __lock_sock(struct sock *sk)
 	__releases(&sk->sk_lock.slock)
 	__acquires(&sk->sk_lock.slock)
@@ -2173,8 +2212,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_error_report	=	sock_def_error_report;
 	sk->sk_destruct		=	sock_def_destruct;
 
-	sk->sk_sndmsg_page	=	NULL;
-	sk->sk_sndmsg_off	=	0;
+	sk->sk_frag.page	=	NULL;
+	sk->sk_frag.offset	=	0;
 	sk->sk_peek_off		=	-1;
 
 	sk->sk_peer_pid 	=	NULL;
@@ -2417,6 +2456,12 @@ void sk_common_release(struct sock *sk)
 	xfrm_sk_free_policy(sk);
 
 	sk_refcnt_debug_release(sk);
+
+	if (sk->sk_frag.page) {
+		put_page(sk->sk_frag.page);
+		sk->sk_frag.page = NULL;
+	}
+
 	sock_put(sk);
 }
 EXPORT_SYMBOL(sk_common_release);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a5beab1dc958..24a29a39e9a8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -793,6 +793,7 @@ static int __ip_append_data(struct sock *sk,
 			    struct flowi4 *fl4,
 			    struct sk_buff_head *queue,
 			    struct inet_cork *cork,
+			    struct page_frag *pfrag,
 			    int getfrag(void *from, char *to, int offset,
 					int len, int odd, struct sk_buff *skb),
 			    void *from, int length, int transhdrlen,
@@ -987,47 +988,30 @@ alloc_new_skb:
 			}
 		} else {
 			int i = skb_shinfo(skb)->nr_frags;
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
-			struct page *page = cork->page;
-			int off = cork->off;
-			unsigned int left;
-
-			if (page && (left = PAGE_SIZE - off) > 0) {
-				if (copy >= left)
-					copy = left;
-				if (page != skb_frag_page(frag)) {
-					if (i == MAX_SKB_FRAGS) {
-						err = -EMSGSIZE;
-						goto error;
-					}
-					skb_fill_page_desc(skb, i, page, off, 0);
-					skb_frag_ref(skb, i);
-					frag = &skb_shinfo(skb)->frags[i];
-				}
-			} else if (i < MAX_SKB_FRAGS) {
-				if (copy > PAGE_SIZE)
-					copy = PAGE_SIZE;
-				page = alloc_pages(sk->sk_allocation, 0);
-				if (page == NULL)  {
-					err = -ENOMEM;
-					goto error;
-				}
-				cork->page = page;
-				cork->off = 0;
 
-				skb_fill_page_desc(skb, i, page, 0, 0);
-				frag = &skb_shinfo(skb)->frags[i];
-			} else {
-				err = -EMSGSIZE;
-				goto error;
-			}
-			if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag),
-				    offset, copy, skb->len, skb) < 0) {
-				err = -EFAULT;
+			err = -ENOMEM;
+			if (!sk_page_frag_refill(sk, pfrag))
 				goto error;
+
+			if (!skb_can_coalesce(skb, i, pfrag->page,
+					      pfrag->offset)) {
+				err = -EMSGSIZE;
+				if (i == MAX_SKB_FRAGS)
+					goto error;
+
+				__skb_fill_page_desc(skb, i, pfrag->page,
+						     pfrag->offset, 0);
+				skb_shinfo(skb)->nr_frags = ++i;
+				get_page(pfrag->page);
 			}
-			cork->off += copy;
-			skb_frag_size_add(frag, copy);
+			copy = min_t(int, copy, pfrag->size - pfrag->offset);
+			if (getfrag(from,
+				    page_address(pfrag->page) + pfrag->offset,
+				    offset, copy, skb->len, skb) < 0)
+				goto error_efault;
+
+			pfrag->offset += copy;
+			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
 			skb->len += copy;
 			skb->data_len += copy;
 			skb->truesize += copy;
@@ -1039,6 +1023,8 @@ alloc_new_skb:
 
 	return 0;
 
+error_efault:
+	err = -EFAULT;
 error:
 	cork->length -= length;
 	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
@@ -1079,8 +1065,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 	cork->dst = &rt->dst;
 	cork->length = 0;
 	cork->tx_flags = ipc->tx_flags;
-	cork->page = NULL;
-	cork->off = 0;
 
 	return 0;
 }
@@ -1117,7 +1101,8 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4,
 		transhdrlen = 0;
 	}
 
-	return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag,
+	return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base,
+				sk_page_frag(sk), getfrag,
 				from, length, transhdrlen, flags);
 }
 
@@ -1439,7 +1424,8 @@ struct sk_buff *ip_make_skb(struct sock *sk,
 	if (err)
 		return ERR_PTR(err);
 
-	err = __ip_append_data(sk, fl4, &queue, &cork, getfrag,
+	err = __ip_append_data(sk, fl4, &queue, &cork,
+			       &current->task_frag, getfrag,
 			       from, length, transhdrlen, flags);
 	if (err) {
 		__ip_flush_pending_frames(sk, &queue, &cork);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f2425785d40a..a80740ba4248 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -131,18 +131,23 @@ found:
  *	0 - deliver
  *	1 - block
  */
-static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
 {
-	int type;
-
-	if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
+	struct icmphdr _hdr;
+	const struct icmphdr *hdr;
+
+	pr_err("icmp_filter skb_transport_offset %d data-head %ld len %d/%d\n",
+		skb_transport_offset(skb), skb->data - skb->head, skb->len, skb->data_len);
+	hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+				 sizeof(_hdr), &_hdr);
+	pr_err("head %p data %p hdr %p type %d\n", skb->head, skb->data, hdr, hdr ? hdr->type : -1);
+	if (!hdr)
 		return 1;
 
-	type = icmp_hdr(skb)->type;
-	if (type < 32) {
+	if (hdr->type < 32) {
 		__u32 data = raw_sk(sk)->filter.data;
 
-		return ((1 << type) & data) != 0;
+		return ((1U << hdr->type) & data) != 0;
 	}
 
 	/* Do not block unknown ICMP types */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7b1e940393cf..72ea4752f21b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1150,78 +1150,43 @@ new_segment:
 				if (err)
 					goto do_fault;
 			} else {
-				bool merge = false;
+				bool merge = true;
 				int i = skb_shinfo(skb)->nr_frags;
-				struct page *page = sk->sk_sndmsg_page;
-				int off;
-
-				if (page && page_count(page) == 1)
-					sk->sk_sndmsg_off = 0;
-
-				off = sk->sk_sndmsg_off;
-
-				if (skb_can_coalesce(skb, i, page, off) &&
-				    off != PAGE_SIZE) {
-					/* We can extend the last page
-					 * fragment. */
-					merge = true;
-				} else if (i == MAX_SKB_FRAGS || !sg) {
-					/* Need to add new fragment and cannot
-					 * do this because interface is non-SG,
-					 * or because all the page slots are
-					 * busy. */
-					tcp_mark_push(tp, skb);
-					goto new_segment;
-				} else if (page) {
-					if (off == PAGE_SIZE) {
-						put_page(page);
-						sk->sk_sndmsg_page = page = NULL;
-						off = 0;
+				struct page_frag *pfrag = sk_page_frag(sk);
+
+				if (!sk_page_frag_refill(sk, pfrag))
+					goto wait_for_memory;
+
+				if (!skb_can_coalesce(skb, i, pfrag->page,
+						      pfrag->offset)) {
+					if (i == MAX_SKB_FRAGS || !sg) {
+						tcp_mark_push(tp, skb);
+						goto new_segment;
 					}
-				} else
-					off = 0;
+					merge = false;
+				}
 
-				if (copy > PAGE_SIZE - off)
-					copy = PAGE_SIZE - off;
+				copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
 				if (!sk_wmem_schedule(sk, copy))
 					goto wait_for_memory;
 
-				if (!page) {
-					/* Allocate new cache page. */
-					if (!(page = sk_stream_alloc_page(sk)))
-						goto wait_for_memory;
-				}
-
-				/* Time to copy data. We are close to
-				 * the end! */
 				err = skb_copy_to_page_nocache(sk, from, skb,
-							       page, off, copy);
-				if (err) {
-					/* If this page was new, give it to the
-					 * socket so it does not get leaked.
-					 */
-					if (!sk->sk_sndmsg_page) {
-						sk->sk_sndmsg_page = page;
-						sk->sk_sndmsg_off = 0;
-					}
+							       pfrag->page,
+							       pfrag->offset,
+							       copy);
+				if (err)
 					goto do_error;
-				}
 
 				/* Update the skb. */
 				if (merge) {
 					skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
 				} else {
-					skb_fill_page_desc(skb, i, page, off, copy);
-					if (sk->sk_sndmsg_page) {
-						get_page(page);
-					} else if (off + copy < PAGE_SIZE) {
-						get_page(page);
-						sk->sk_sndmsg_page = page;
-					}
+					skb_fill_page_desc(skb, i, pfrag->page,
+							   pfrag->offset, copy);
+					get_page(pfrag->page);
 				}
-
-				sk->sk_sndmsg_off = off + copy;
+				pfrag->offset += copy;
 			}
 
 			if (!copied)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0a7e020f16b5..93406c583f43 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2200,14 +2200,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
 
-	/*
-	 * If sendmsg cached page exists, toss it.
-	 */
-	if (sk->sk_sndmsg_page) {
-		__free_page(sk->sk_sndmsg_page);
-		sk->sk_sndmsg_page = NULL;
-	}
-
 	/* TCP Cookie Transactions */
 	if (tp->cookie_values != NULL) {
 		kref_put(&tp->cookie_values->kref,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3dd4a37488d5..aece3e792f84 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1279,8 +1279,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		if (dst_allfrag(rt->dst.path))
 			cork->flags |= IPCORK_ALLFRAG;
 		cork->length = 0;
-		sk->sk_sndmsg_page = NULL;
-		sk->sk_sndmsg_off = 0;
 		exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
@@ -1504,48 +1502,31 @@ alloc_new_skb:
 			}
 		} else {
 			int i = skb_shinfo(skb)->nr_frags;
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
-			struct page *page = sk->sk_sndmsg_page;
-			int off = sk->sk_sndmsg_off;
-			unsigned int left;
-
-			if (page && (left = PAGE_SIZE - off) > 0) {
-				if (copy >= left)
-					copy = left;
-				if (page != skb_frag_page(frag)) {
-					if (i == MAX_SKB_FRAGS) {
-						err = -EMSGSIZE;
-						goto error;
-					}
-					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
-					skb_frag_ref(skb, i);
-					frag = &skb_shinfo(skb)->frags[i];
-				}
-			} else if(i < MAX_SKB_FRAGS) {
-				if (copy > PAGE_SIZE)
-					copy = PAGE_SIZE;
-				page = alloc_pages(sk->sk_allocation, 0);
-				if (page == NULL) {
-					err = -ENOMEM;
-					goto error;
-				}
-				sk->sk_sndmsg_page = page;
-				sk->sk_sndmsg_off = 0;
+			struct page_frag *pfrag = sk_page_frag(sk);
 
-				skb_fill_page_desc(skb, i, page, 0, 0);
-				frag = &skb_shinfo(skb)->frags[i];
-			} else {
-				err = -EMSGSIZE;
+			err = -ENOMEM;
+			if (!sk_page_frag_refill(sk, pfrag))
 				goto error;
+
+			if (!skb_can_coalesce(skb, i, pfrag->page,
+					      pfrag->offset)) {
+				err = -EMSGSIZE;
+				if (i == MAX_SKB_FRAGS)
+					goto error;
+
+				__skb_fill_page_desc(skb, i, pfrag->page,
+						     pfrag->offset, 0);
+				skb_shinfo(skb)->nr_frags = ++i;
+				get_page(pfrag->page);
 			}
+			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 			if (getfrag(from,
-				    skb_frag_address(frag) + skb_frag_size(frag),
-				    offset, copy, skb->len, skb) < 0) {
-				err = -EFAULT;
-				goto error;
-			}
-			sk->sk_sndmsg_off += copy;
-			skb_frag_size_add(frag, copy);
+				    page_address(pfrag->page) + pfrag->offset,
+				    offset, copy, skb->len, skb) < 0)
+				goto error_efault;
+
+			pfrag->offset += copy;
+			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
 			skb->len += copy;
 			skb->data_len += copy;
 			skb->truesize += copy;
@@ -1554,7 +1535,11 @@ alloc_new_skb:
 		offset += copy;
 		length -= copy;
 	}
+
 	return 0;
+
+error_efault:
+	err = -EFAULT;
 error:
 	cork->length -= length;
 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 4ab6e3325573..7c3de6ffa516 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -461,7 +461,7 @@ META_COLLECTOR(int_sk_sndtimeo)
 META_COLLECTOR(int_sk_sendmsg_off)
 {
 	SKIP_NONLOCAL(skb);
-	dst->value = skb->sk->sk_sndmsg_off;
+	dst->value = skb->sk->sk_frag.offset;
 }
 
 META_COLLECTOR(int_sk_write_pend)
-- 
cgit v1.2.3


From 9e49e88958feb41ec701fa34b44723dabadbc28c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dxchgb@gmail.com>
Date: Mon, 24 Sep 2012 02:23:59 +0000
Subject: filter: add XOR instruction for use with X/K

SKF_AD_ALU_XOR_X has been added a while ago, but as an 'ancillary'
operation that is invoked through a negative offset in K within BPF
load operations. Since BPF_MOD has recently been added, BPF_XOR should
also be part of the common ALU operations. Removing SKF_AD_ALU_XOR_X
might not be an option since this is exposed to user space.

Signed-off-by: Daniel Borkmann <daniel.borkmann@tik.ee.ethz.ch>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  3 +++
 net/core/filter.c      | 12 +++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3cf5fd561d86..2ded090e10f4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -75,6 +75,7 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define         BPF_RSH         0x70
 #define         BPF_NEG         0x80
 #define		BPF_MOD		0x90
+#define		BPF_XOR		0xa0
 
 #define         BPF_JA          0x00
 #define         BPF_JEQ         0x10
@@ -204,6 +205,8 @@ enum {
 	BPF_S_ALU_AND_X,
 	BPF_S_ALU_OR_K,
 	BPF_S_ALU_OR_X,
+	BPF_S_ALU_XOR_K,
+	BPF_S_ALU_XOR_X,
 	BPF_S_ALU_LSH_K,
 	BPF_S_ALU_LSH_X,
 	BPF_S_ALU_RSH_K,
diff --git a/net/core/filter.c b/net/core/filter.c
index fbe3a8d12570..3d92ebb7fbcf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -187,6 +187,13 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
 		case BPF_S_ALU_OR_K:
 			A |= K;
 			continue;
+		case BPF_S_ANC_ALU_XOR_X:
+		case BPF_S_ALU_XOR_X:
+			A ^= X;
+			continue;
+		case BPF_S_ALU_XOR_K:
+			A ^= K;
+			continue;
 		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
@@ -334,9 +341,6 @@ load_b:
 		case BPF_S_ANC_CPU:
 			A = raw_smp_processor_id();
 			continue;
-		case BPF_S_ANC_ALU_XOR_X:
-			A ^= X;
-			continue;
 		case BPF_S_ANC_NLATTR: {
 			struct nlattr *nla;
 
@@ -483,6 +487,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
 		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
 		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
+		[BPF_ALU|BPF_XOR|BPF_K]  = BPF_S_ALU_XOR_K,
+		[BPF_ALU|BPF_XOR|BPF_X]  = BPF_S_ALU_XOR_X,
 		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
 		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
 		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
-- 
cgit v1.2.3


From 7e0352306f6869b442a574a8e691f126c9fe930a Mon Sep 17 00:00:00 2001
From: Ilan Elias <ilane@ti.com>
Date: Wed, 15 Aug 2012 11:46:22 +0300
Subject: NFC: Set local general bytes in nci_start_poll

If initiator protocol is NFC-DEP, set the local general bytes
in nci_start_poll.

Signed-off-by: Ilan Elias <ilane@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h      | 23 +++++++++++++++++++
 include/net/nfc/nci_core.h |  1 +
 net/nfc/nci/core.c         | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nci/rsp.c          | 14 ++++++++++++
 4 files changed, 93 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 276094b91d7c..d260ef3629db 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -32,6 +32,7 @@
 #define NCI_MAX_NUM_MAPPING_CONFIGS				10
 #define NCI_MAX_NUM_RF_CONFIGS					10
 #define NCI_MAX_NUM_CONN					10
+#define NCI_MAX_PARAM_LEN					251
 
 /* NCI Status Codes */
 #define NCI_STATUS_OK						0x00
@@ -102,6 +103,9 @@
 #define NCI_RF_INTERFACE_ISO_DEP				0x02
 #define NCI_RF_INTERFACE_NFC_DEP				0x03
 
+/* NCI Configuration Parameter Tags */
+#define NCI_PN_ATR_REQ_GEN_BYTES				0x29
+
 /* NCI Reset types */
 #define NCI_RESET_TYPE_KEEP_CONFIG				0x00
 #define NCI_RESET_TYPE_RESET_CONFIG				0x01
@@ -188,6 +192,18 @@ struct nci_core_reset_cmd {
 
 #define NCI_OP_CORE_INIT_CMD		nci_opcode_pack(NCI_GID_CORE, 0x01)
 
+#define NCI_OP_CORE_SET_CONFIG_CMD	nci_opcode_pack(NCI_GID_CORE, 0x02)
+struct set_config_param {
+	__u8	id;
+	__u8	len;
+	__u8	val[NCI_MAX_PARAM_LEN];
+} __packed;
+
+struct nci_core_set_config_cmd {
+	__u8	num_params;
+	struct	set_config_param param; /* support 1 param per cmd is enough */
+} __packed;
+
 #define NCI_OP_RF_DISCOVER_MAP_CMD	nci_opcode_pack(NCI_GID_RF_MGMT, 0x00)
 struct disc_map_config {
 	__u8	rf_protocol;
@@ -252,6 +268,13 @@ struct nci_core_init_rsp_2 {
 	__le32	manufact_specific_info;
 } __packed;
 
+#define NCI_OP_CORE_SET_CONFIG_RSP	nci_opcode_pack(NCI_GID_CORE, 0x02)
+struct nci_core_set_config_rsp {
+	__u8	status;
+	__u8	num_params;
+	__u8	params_id[0];	/* variable size array */
+} __packed;
+
 #define NCI_OP_RF_DISCOVER_MAP_RSP	nci_opcode_pack(NCI_GID_RF_MGMT, 0x00)
 
 #define NCI_OP_RF_DISCOVER_RSP		nci_opcode_pack(NCI_GID_RF_MGMT, 0x03)
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index feba74027ff8..f98674d7baf1 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -54,6 +54,7 @@ enum nci_state {
 /* NCI timeouts */
 #define NCI_RESET_TIMEOUT			5000
 #define NCI_INIT_TIMEOUT			5000
+#define NCI_SET_CONFIG_TIMEOUT			5000
 #define NCI_RF_DISC_TIMEOUT			5000
 #define NCI_RF_DISC_SELECT_TIMEOUT		5000
 #define NCI_RF_DEACTIVATE_TIMEOUT		30000
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index f81efe13985a..f017b781667a 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -176,6 +176,27 @@ static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
 		     (1 + ((*num) * sizeof(struct disc_map_config))), &cmd);
 }
 
+struct nci_set_config_param {
+	__u8	id;
+	size_t	len;
+	__u8	*val;
+};
+
+static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_set_config_param *param = (struct nci_set_config_param *)opt;
+	struct nci_core_set_config_cmd cmd;
+
+	BUG_ON(param->len > NCI_MAX_PARAM_LEN);
+
+	cmd.num_params = 1;
+	cmd.param.id = param->id;
+	cmd.param.len = param->len;
+	memcpy(cmd.param.val, param->val, param->len);
+
+	nci_send_cmd(ndev, NCI_OP_CORE_SET_CONFIG_CMD, (3 + param->len), &cmd);
+}
+
 static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
 {
 	struct nci_rf_disc_cmd cmd;
@@ -388,6 +409,32 @@ static int nci_dev_down(struct nfc_dev *nfc_dev)
 	return nci_close_device(ndev);
 }
 
+static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
+{
+	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
+	struct nci_set_config_param param;
+	__u8 local_gb[NFC_MAX_GT_LEN];
+	int i, rc = 0;
+
+	param.val = nfc_get_local_general_bytes(nfc_dev, &param.len);
+	if ((param.val == NULL) || (param.len == 0))
+		return rc;
+
+	if (param.len > NCI_MAX_PARAM_LEN)
+		return -EINVAL;
+
+	for (i = 0; i < param.len; i++)
+		local_gb[param.len-1-i] = param.val[i];
+
+	param.id = NCI_PN_ATR_REQ_GEN_BYTES;
+	param.val = local_gb;
+
+	rc = nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+			 msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
+
+	return rc;
+}
+
 static int nci_start_poll(struct nfc_dev *nfc_dev,
 			  __u32 im_protocols, __u32 tm_protocols)
 {
@@ -415,6 +462,14 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
 			return -EBUSY;
 	}
 
+	if (im_protocols & NFC_PROTO_NFC_DEP_MASK) {
+		rc = nci_set_local_general_bytes(nfc_dev);
+		if (rc) {
+			pr_err("failed to set local general bytes\n");
+			return rc;
+		}
+	}
+
 	rc = nci_request(ndev, nci_rf_discover_req, im_protocols,
 			 msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));
 
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 3003c3390e49..dd072f38ad00 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -119,6 +119,16 @@ exit:
 	nci_req_complete(ndev, rsp_1->status);
 }
 
+static void nci_core_set_config_rsp_packet(struct nci_dev *ndev,
+					   struct sk_buff *skb)
+{
+	struct nci_core_set_config_rsp *rsp = (void *) skb->data;
+
+	pr_debug("status 0x%x\n", rsp->status);
+
+	nci_req_complete(ndev, rsp->status);
+}
+
 static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
 				       struct sk_buff *skb)
 {
@@ -194,6 +204,10 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_core_init_rsp_packet(ndev, skb);
 		break;
 
+	case NCI_OP_CORE_SET_CONFIG_RSP:
+		nci_core_set_config_rsp_packet(ndev, skb);
+		break;
+
 	case NCI_OP_RF_DISCOVER_MAP_RSP:
 		nci_rf_disc_map_rsp_packet(ndev, skb);
 		break;
-- 
cgit v1.2.3


From ac206838403411e617dbe0e7df1891ee957f1f9a Mon Sep 17 00:00:00 2001
From: Ilan Elias <ilane@ti.com>
Date: Wed, 15 Aug 2012 11:46:23 +0300
Subject: NFC: Parse NCI NFC-DEP activation params

Signed-off-by: Ilan Elias <ilane@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h |  6 ++++++
 net/nfc/nci/ntf.c     | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index d260ef3629db..88785e5c6b2c 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -351,6 +351,11 @@ struct activation_params_nfcb_poll_iso_dep {
 	__u8	attrib_res[50];
 };
 
+struct activation_params_poll_nfc_dep {
+	__u8	atr_res_len;
+	__u8	atr_res[63];
+};
+
 struct nci_rf_intf_activated_ntf {
 	__u8	rf_discovery_id;
 	__u8	rf_interface;
@@ -374,6 +379,7 @@ struct nci_rf_intf_activated_ntf {
 	union {
 		struct activation_params_nfca_poll_iso_dep nfca_poll_iso_dep;
 		struct activation_params_nfcb_poll_iso_dep nfcb_poll_iso_dep;
+		struct activation_params_poll_nfc_dep poll_nfc_dep;
 	} activation_params;
 
 } __packed;
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index af7a93b04393..6e17661a41a4 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -361,6 +361,33 @@ static int nci_extract_activation_params_iso_dep(struct nci_dev *ndev,
 	return NCI_STATUS_OK;
 }
 
+static int nci_extract_activation_params_nfc_dep(struct nci_dev *ndev,
+			struct nci_rf_intf_activated_ntf *ntf, __u8 *data)
+{
+	struct activation_params_poll_nfc_dep *poll;
+	int i;
+
+	switch (ntf->activation_rf_tech_and_mode) {
+	case NCI_NFC_A_PASSIVE_POLL_MODE:
+	case NCI_NFC_F_PASSIVE_POLL_MODE:
+		poll = &ntf->activation_params.poll_nfc_dep;
+		poll->atr_res_len = min_t(__u8, *data++, 63);
+		pr_debug("atr_res_len %d\n", poll->atr_res_len);
+		if (poll->atr_res_len > 0) {
+			for (i = 0; i < poll->atr_res_len; i++)
+				poll->atr_res[poll->atr_res_len-1-i] = data[i];
+		}
+		break;
+
+	default:
+		pr_err("unsupported activation_rf_tech_and_mode 0x%x\n",
+		       ntf->activation_rf_tech_and_mode);
+		return NCI_STATUS_RF_PROTOCOL_ERROR;
+	}
+
+	return NCI_STATUS_OK;
+}
+
 static void nci_target_auto_activated(struct nci_dev *ndev,
 				      struct nci_rf_intf_activated_ntf *ntf)
 {
@@ -454,6 +481,11 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
 								    &ntf, data);
 			break;
 
+		case NCI_RF_INTERFACE_NFC_DEP:
+			err = nci_extract_activation_params_nfc_dep(ndev,
+								    &ntf, data);
+			break;
+
 		case NCI_RF_INTERFACE_FRAME:
 			/* no activation params */
 			break;
-- 
cgit v1.2.3


From 767f19ae698e535f308663c48245fa951abebe20 Mon Sep 17 00:00:00 2001
From: Ilan Elias <ilane@ti.com>
Date: Wed, 15 Aug 2012 11:46:24 +0300
Subject: NFC: Implement NCI dep_link_up and dep_link_down

During NFC-DEP target activation, store the remote
general bytes to be used later in dep_link_up.
When dep_link_up is called, activate the NFC-DEP target,
and forward the remote general bytes.
When dep_link_down is called, deactivate the target.

Signed-off-by: Ilan Elias <ilane@ti.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h |  4 ++++
 include/net/nfc/nfc.h      |  1 +
 net/nfc/nci/core.c         | 36 +++++++++++++++++++++++++++++++++++-
 net/nfc/nci/ntf.c          | 20 ++++++++++++++++++++
 4 files changed, 60 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index f98674d7baf1..d705d8674949 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -138,6 +138,10 @@ struct nci_dev {
 	data_exchange_cb_t	data_exchange_cb;
 	void			*data_exchange_cb_context;
 	struct sk_buff		*rx_data_reassembly;
+
+	/* stored during intf_activated_ntf */
+	__u8 remote_gb[NFC_MAX_GT_LEN];
+	__u8 remote_gb_len;
 };
 
 /* ----- NCI Devices ----- */
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 6431f5e39022..7b9f71fa0ea7 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -72,6 +72,7 @@ struct nfc_ops {
 
 #define NFC_TARGET_IDX_ANY -1
 #define NFC_MAX_GT_LEN 48
+#define NFC_ATR_RES_GT_OFFSET 15
 
 struct nfc_target {
 	u32 idx;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index f017b781667a..acf9abb7d99b 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -564,7 +564,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
 
-	pr_debug("target_idx %d\n", target->idx);
+	pr_debug("entry\n");
 
 	if (!ndev->target_active_prot) {
 		pr_err("unable to deactivate target, no active target\n");
@@ -579,6 +579,38 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
 	}
 }
 
+
+static int nci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target,
+			   __u8 comm_mode, __u8 *gb, size_t gb_len)
+{
+	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
+	int rc;
+
+	pr_debug("target_idx %d, comm_mode %d\n", target->idx, comm_mode);
+
+	rc = nci_activate_target(nfc_dev, target, NFC_PROTO_NFC_DEP);
+	if (rc)
+		return rc;
+
+	rc = nfc_set_remote_general_bytes(nfc_dev, ndev->remote_gb,
+					  ndev->remote_gb_len);
+	if (!rc)
+		rc = nfc_dep_link_is_up(nfc_dev, target->idx, NFC_COMM_PASSIVE,
+					NFC_RF_INITIATOR);
+
+	return rc;
+}
+
+static int nci_dep_link_down(struct nfc_dev *nfc_dev)
+{
+	pr_debug("entry\n");
+
+	nci_deactivate_target(nfc_dev, NULL);
+
+	return 0;
+}
+
+
 static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 			  struct sk_buff *skb,
 			  data_exchange_cb_t cb, void *cb_context)
@@ -612,6 +644,8 @@ static struct nfc_ops nci_nfc_ops = {
 	.dev_down = nci_dev_down,
 	.start_poll = nci_start_poll,
 	.stop_poll = nci_stop_poll,
+	.dep_link_up = nci_dep_link_up,
+	.dep_link_down = nci_dep_link_down,
 	.activate_target = nci_activate_target,
 	.deactivate_target = nci_deactivate_target,
 	.im_transceive = nci_transceive,
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 6e17661a41a4..b2aa98ef0927 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -176,6 +176,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
 			protocol = NFC_PROTO_ISO14443_B_MASK;
 	else if (rf_protocol == NCI_RF_PROTOCOL_T3T)
 		protocol = NFC_PROTO_FELICA_MASK;
+	else if (rf_protocol == NCI_RF_PROTOCOL_NFC_DEP)
+		protocol = NFC_PROTO_NFC_DEP_MASK;
 	else
 		protocol = 0;
 
@@ -505,6 +507,24 @@ exit:
 
 		/* set the available credits to initial value */
 		atomic_set(&ndev->credits_cnt, ndev->initial_num_credits);
+
+		/* store general bytes to be reported later in dep_link_up */
+		if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) {
+			ndev->remote_gb_len = 0;
+
+			if (ntf.activation_params_len > 0) {
+				/* ATR_RES general bytes at offset 15 */
+				ndev->remote_gb_len = min_t(__u8,
+					(ntf.activation_params
+					.poll_nfc_dep.atr_res_len
+					- NFC_ATR_RES_GT_OFFSET),
+					NFC_MAX_GT_LEN);
+				memcpy(ndev->remote_gb,
+				       (ntf.activation_params.poll_nfc_dep
+				       .atr_res + NFC_ATR_RES_GT_OFFSET),
+				       ndev->remote_gb_len);
+			}
+		}
 	}
 
 	if (atomic_read(&ndev->state) == NCI_DISCOVERY) {
-- 
cgit v1.2.3


From 474fee3db16c63bc440bfb93b57f72ecfc4246f0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 22 Aug 2012 16:22:16 -0700
Subject: NFC: Use system_nrt_wq instead of custom ones

NFC is using a number of custom ordered workqueues w/ WQ_MEM_RECLAIM.
WQ_MEM_RECLAIM is unnecessary unless NFC is gonna be used as transport
for storage device, and all use cases match one work item to one
ordered workqueue - IOW, there's no actual ordering going on at all
and using system_nrt_wq gives the same behavior.

There's nothing to be gained by using custom workqueues.  Use
system_nrt_wq instead and drop all the custom ones.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h   |  2 --
 include/net/nfc/nfc.h   |  1 -
 include/net/nfc/shdlc.h |  1 -
 net/nfc/core.c          | 13 ++---------
 net/nfc/hci/core.c      | 42 ++++++------------------------------
 net/nfc/hci/hcp.c       |  2 +-
 net/nfc/hci/shdlc.c     | 27 ++++++++---------------
 net/nfc/llcp/llcp.c     | 57 +++++++------------------------------------------
 net/nfc/llcp/llcp.h     |  3 ---
 9 files changed, 26 insertions(+), 122 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index f5169b04f082..6f065d563ec1 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -74,7 +74,6 @@ struct nfc_hci_dev {
 
 	struct list_head msg_tx_queue;
 
-	struct workqueue_struct *msg_tx_wq;
 	struct work_struct msg_tx_work;
 
 	struct timer_list cmd_timer;
@@ -82,7 +81,6 @@ struct nfc_hci_dev {
 
 	struct sk_buff_head rx_hcp_frags;
 
-	struct workqueue_struct *msg_rx_wq;
 	struct work_struct msg_rx_work;
 
 	struct sk_buff_head msg_rx_queue;
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 7b9f71fa0ea7..bfbac732d8c6 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -113,7 +113,6 @@ struct nfc_dev {
 	int tx_tailroom;
 
 	struct timer_list check_pres_timer;
-	struct workqueue_struct *check_pres_wq;
 	struct work_struct check_pres_work;
 
 	struct nfc_ops *ops;
diff --git a/include/net/nfc/shdlc.h b/include/net/nfc/shdlc.h
index 35e930d2f638..342427362989 100644
--- a/include/net/nfc/shdlc.h
+++ b/include/net/nfc/shdlc.h
@@ -79,7 +79,6 @@ struct nfc_shdlc {
 
 	struct sk_buff_head ack_pending_q;
 
-	struct workqueue_struct *sm_wq;
 	struct work_struct sm_work;
 
 	struct nfc_shdlc_ops *ops;
diff --git a/net/nfc/core.c b/net/nfc/core.c
index ff749794bc5b..c9eacc1f145f 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -679,7 +679,7 @@ static void nfc_release(struct device *d)
 
 	if (dev->ops->check_presence) {
 		del_timer_sync(&dev->check_pres_timer);
-		destroy_workqueue(dev->check_pres_wq);
+		cancel_work_sync(&dev->check_pres_work);
 	}
 
 	nfc_genl_data_exit(&dev->genl_data);
@@ -715,7 +715,7 @@ static void nfc_check_pres_timeout(unsigned long data)
 {
 	struct nfc_dev *dev = (struct nfc_dev *)data;
 
-	queue_work(dev->check_pres_wq, &dev->check_pres_work);
+	queue_work(system_nrt_wq, &dev->check_pres_work);
 }
 
 struct class nfc_class = {
@@ -784,20 +784,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
 	dev->targets_generation = 1;
 
 	if (ops->check_presence) {
-		char name[32];
 		init_timer(&dev->check_pres_timer);
 		dev->check_pres_timer.data = (unsigned long)dev;
 		dev->check_pres_timer.function = nfc_check_pres_timeout;
 
 		INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
-		snprintf(name, sizeof(name), "nfc%d_check_pres_wq", dev->idx);
-		dev->check_pres_wq = alloc_workqueue(name, WQ_NON_REENTRANT |
-						     WQ_UNBOUND |
-						     WQ_MEM_RECLAIM, 1);
-		if (dev->check_pres_wq == NULL) {
-			kfree(dev);
-			return NULL;
-		}
 	}
 
 	return dev;
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 1ac7b3fac6c9..03646beb3a73 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -141,7 +141,7 @@ static void __nfc_hci_cmd_completion(struct nfc_hci_dev *hdev, int err,
 	kfree(hdev->cmd_pending_msg);
 	hdev->cmd_pending_msg = NULL;
 
-	queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work);
+	queue_work(system_nrt_wq, &hdev->msg_tx_work);
 }
 
 void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result,
@@ -326,7 +326,7 @@ static void nfc_hci_cmd_timeout(unsigned long data)
 {
 	struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data;
 
-	queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work);
+	queue_work(system_nrt_wq, &hdev->msg_tx_work);
 }
 
 static int hci_dev_connect_gates(struct nfc_hci_dev *hdev, u8 gate_count,
@@ -659,23 +659,11 @@ EXPORT_SYMBOL(nfc_hci_free_device);
 
 int nfc_hci_register_device(struct nfc_hci_dev *hdev)
 {
-	struct device *dev = &hdev->ndev->dev;
-	const char *devname = dev_name(dev);
-	char name[32];
-	int r = 0;
-
 	mutex_init(&hdev->msg_tx_mutex);
 
 	INIT_LIST_HEAD(&hdev->msg_tx_queue);
 
 	INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
-	snprintf(name, sizeof(name), "%s_hci_msg_tx_wq", devname);
-	hdev->msg_tx_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND |
-					  WQ_MEM_RECLAIM, 1);
-	if (hdev->msg_tx_wq == NULL) {
-		r = -ENOMEM;
-		goto exit;
-	}
 
 	init_timer(&hdev->cmd_timer);
 	hdev->cmd_timer.data = (unsigned long)hdev;
@@ -684,27 +672,10 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
 	skb_queue_head_init(&hdev->rx_hcp_frags);
 
 	INIT_WORK(&hdev->msg_rx_work, nfc_hci_msg_rx_work);
-	snprintf(name, sizeof(name), "%s_hci_msg_rx_wq", devname);
-	hdev->msg_rx_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND |
-					  WQ_MEM_RECLAIM, 1);
-	if (hdev->msg_rx_wq == NULL) {
-		r = -ENOMEM;
-		goto exit;
-	}
 
 	skb_queue_head_init(&hdev->msg_rx_queue);
 
-	r = nfc_register_device(hdev->ndev);
-
-exit:
-	if (r < 0) {
-		if (hdev->msg_tx_wq)
-			destroy_workqueue(hdev->msg_tx_wq);
-		if (hdev->msg_rx_wq)
-			destroy_workqueue(hdev->msg_rx_wq);
-	}
-
-	return r;
+	return nfc_register_device(hdev->ndev);
 }
 EXPORT_SYMBOL(nfc_hci_register_device);
 
@@ -725,9 +696,8 @@ void nfc_hci_unregister_device(struct nfc_hci_dev *hdev)
 
 	nfc_unregister_device(hdev->ndev);
 
-	destroy_workqueue(hdev->msg_tx_wq);
-
-	destroy_workqueue(hdev->msg_rx_wq);
+	cancel_work_sync(&hdev->msg_tx_work);
+	cancel_work_sync(&hdev->msg_rx_work);
 }
 EXPORT_SYMBOL(nfc_hci_unregister_device);
 
@@ -827,7 +797,7 @@ void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 		nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb);
 	} else {
 		skb_queue_tail(&hdev->msg_rx_queue, hcp_skb);
-		queue_work(hdev->msg_rx_wq, &hdev->msg_rx_work);
+		queue_work(system_nrt_wq, &hdev->msg_rx_work);
 	}
 }
 EXPORT_SYMBOL(nfc_hci_recv_frame);
diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c
index f4dad1a89740..2372b558abe9 100644
--- a/net/nfc/hci/hcp.c
+++ b/net/nfc/hci/hcp.c
@@ -108,7 +108,7 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe,
 	list_add_tail(&cmd->msg_l, &hdev->msg_tx_queue);
 	mutex_unlock(&hdev->msg_tx_mutex);
 
-	queue_work(hdev->msg_tx_wq, &hdev->msg_tx_work);
+	queue_work(system_nrt_wq, &hdev->msg_tx_work);
 
 	return 0;
 
diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c
index 6f840c18c892..39b51eacc391 100644
--- a/net/nfc/hci/shdlc.c
+++ b/net/nfc/hci/shdlc.c
@@ -540,7 +540,7 @@ static void nfc_shdlc_connect_timeout(unsigned long data)
 
 	pr_debug("\n");
 
-	queue_work(shdlc->sm_wq, &shdlc->sm_work);
+	queue_work(system_nrt_wq, &shdlc->sm_work);
 }
 
 static void nfc_shdlc_t1_timeout(unsigned long data)
@@ -549,7 +549,7 @@ static void nfc_shdlc_t1_timeout(unsigned long data)
 
 	pr_debug("SoftIRQ: need to send ack\n");
 
-	queue_work(shdlc->sm_wq, &shdlc->sm_work);
+	queue_work(system_nrt_wq, &shdlc->sm_work);
 }
 
 static void nfc_shdlc_t2_timeout(unsigned long data)
@@ -558,7 +558,7 @@ static void nfc_shdlc_t2_timeout(unsigned long data)
 
 	pr_debug("SoftIRQ: need to retransmit\n");
 
-	queue_work(shdlc->sm_wq, &shdlc->sm_work);
+	queue_work(system_nrt_wq, &shdlc->sm_work);
 }
 
 static void nfc_shdlc_sm_work(struct work_struct *work)
@@ -598,7 +598,7 @@ static void nfc_shdlc_sm_work(struct work_struct *work)
 	case SHDLC_NEGOCIATING:
 		if (timer_pending(&shdlc->connect_timer) == 0) {
 			shdlc->state = SHDLC_CONNECTING;
-			queue_work(shdlc->sm_wq, &shdlc->sm_work);
+			queue_work(system_nrt_wq, &shdlc->sm_work);
 		}
 
 		nfc_shdlc_handle_rcv_queue(shdlc);
@@ -662,7 +662,7 @@ static int nfc_shdlc_connect(struct nfc_shdlc *shdlc)
 
 	mutex_unlock(&shdlc->state_mutex);
 
-	queue_work(shdlc->sm_wq, &shdlc->sm_work);
+	queue_work(system_nrt_wq, &shdlc->sm_work);
 
 	wait_event(connect_wq, shdlc->connect_result != 1);
 
@@ -679,7 +679,7 @@ static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc)
 
 	mutex_unlock(&shdlc->state_mutex);
 
-	queue_work(shdlc->sm_wq, &shdlc->sm_work);
+	queue_work(system_nrt_wq, &shdlc->sm_work);
 }
 
 /*
@@ -697,7 +697,7 @@ void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb)
 		skb_queue_tail(&shdlc->rcv_q, skb);
 	}
 
-	queue_work(shdlc->sm_wq, &shdlc->sm_work);
+	queue_work(system_nrt_wq, &shdlc->sm_work);
 }
 EXPORT_SYMBOL(nfc_shdlc_recv_frame);
 
@@ -754,7 +754,7 @@ static int nfc_shdlc_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 
 	skb_queue_tail(&shdlc->send_q, skb);
 
-	queue_work(shdlc->sm_wq, &shdlc->sm_work);
+	queue_work(system_nrt_wq, &shdlc->sm_work);
 
 	return 0;
 }
@@ -843,7 +843,6 @@ struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops,
 {
 	struct nfc_shdlc *shdlc;
 	int r;
-	char name[32];
 
 	if (ops->xmit == NULL)
 		return NULL;
@@ -876,11 +875,6 @@ struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops,
 	skb_queue_head_init(&shdlc->ack_pending_q);
 
 	INIT_WORK(&shdlc->sm_work, nfc_shdlc_sm_work);
-	snprintf(name, sizeof(name), "%s_shdlc_sm_wq", devname);
-	shdlc->sm_wq = alloc_workqueue(name, WQ_NON_REENTRANT | WQ_UNBOUND |
-				       WQ_MEM_RECLAIM, 1);
-	if (shdlc->sm_wq == NULL)
-		goto err_allocwq;
 
 	shdlc->client_headroom = tx_headroom;
 	shdlc->client_tailroom = tx_tailroom;
@@ -904,9 +898,6 @@ err_regdev:
 	nfc_hci_free_device(shdlc->hdev);
 
 err_allocdev:
-	destroy_workqueue(shdlc->sm_wq);
-
-err_allocwq:
 	kfree(shdlc);
 
 	return NULL;
@@ -920,7 +911,7 @@ void nfc_shdlc_free(struct nfc_shdlc *shdlc)
 	nfc_hci_unregister_device(shdlc->hdev);
 	nfc_hci_free_device(shdlc->hdev);
 
-	destroy_workqueue(shdlc->sm_wq);
+	cancel_work_sync(&shdlc->sm_work);
 
 	skb_queue_purge(&shdlc->rcv_q);
 	skb_queue_purge(&shdlc->send_q);
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 82f0f7588b46..6f368412ffd2 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -114,9 +114,9 @@ static void local_release(struct kref *ref)
 	nfc_llcp_socket_release(local, false);
 	del_timer_sync(&local->link_timer);
 	skb_queue_purge(&local->tx_queue);
-	destroy_workqueue(local->tx_wq);
-	destroy_workqueue(local->rx_wq);
-	destroy_workqueue(local->timeout_wq);
+	cancel_work_sync(&local->tx_work);
+	cancel_work_sync(&local->rx_work);
+	cancel_work_sync(&local->timeout_work);
 	kfree_skb(local->rx_pending);
 	kfree(local);
 }
@@ -181,7 +181,7 @@ static void nfc_llcp_symm_timer(unsigned long data)
 
 	pr_err("SYMM timeout\n");
 
-	queue_work(local->timeout_wq, &local->timeout_work);
+	queue_work(system_nrt_wq, &local->timeout_work);
 }
 
 struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
@@ -1052,7 +1052,7 @@ static void nfc_llcp_rx_work(struct work_struct *work)
 
 	}
 
-	queue_work(local->tx_wq, &local->tx_work);
+	queue_work(system_nrt_wq, &local->tx_work);
 	kfree_skb(local->rx_pending);
 	local->rx_pending = NULL;
 
@@ -1071,7 +1071,7 @@ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err)
 
 	local->rx_pending = skb_get(skb);
 	del_timer(&local->link_timer);
-	queue_work(local->rx_wq, &local->rx_work);
+	queue_work(system_nrt_wq, &local->rx_work);
 
 	return;
 }
@@ -1086,7 +1086,7 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb)
 
 	local->rx_pending = skb_get(skb);
 	del_timer(&local->link_timer);
-	queue_work(local->rx_wq, &local->rx_work);
+	queue_work(system_nrt_wq, &local->rx_work);
 
 	return 0;
 }
@@ -1121,7 +1121,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
 	if (rf_mode == NFC_RF_INITIATOR) {
 		pr_debug("Queueing Tx work\n");
 
-		queue_work(local->tx_wq, &local->tx_work);
+		queue_work(system_nrt_wq, &local->tx_work);
 	} else {
 		mod_timer(&local->link_timer,
 			  jiffies + msecs_to_jiffies(local->remote_lto));
@@ -1130,10 +1130,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
 
 int nfc_llcp_register_device(struct nfc_dev *ndev)
 {
-	struct device *dev = &ndev->dev;
 	struct nfc_llcp_local *local;
-	char name[32];
-	int err;
 
 	local = kzalloc(sizeof(struct nfc_llcp_local), GFP_KERNEL);
 	if (local == NULL)
@@ -1149,38 +1146,11 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 
 	skb_queue_head_init(&local->tx_queue);
 	INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
-	snprintf(name, sizeof(name), "%s_llcp_tx_wq", dev_name(dev));
-	local->tx_wq =
-		alloc_workqueue(name,
-				WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM,
-				1);
-	if (local->tx_wq == NULL) {
-		err = -ENOMEM;
-		goto err_local;
-	}
 
 	local->rx_pending = NULL;
 	INIT_WORK(&local->rx_work, nfc_llcp_rx_work);
-	snprintf(name, sizeof(name), "%s_llcp_rx_wq", dev_name(dev));
-	local->rx_wq =
-		alloc_workqueue(name,
-				WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM,
-				1);
-	if (local->rx_wq == NULL) {
-		err = -ENOMEM;
-		goto err_tx_wq;
-	}
 
 	INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work);
-	snprintf(name, sizeof(name), "%s_llcp_timeout_wq", dev_name(dev));
-	local->timeout_wq =
-		alloc_workqueue(name,
-				WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM,
-				1);
-	if (local->timeout_wq == NULL) {
-		err = -ENOMEM;
-		goto err_rx_wq;
-	}
 
 	local->sockets.lock = __RW_LOCK_UNLOCKED(local->sockets.lock);
 	local->connecting_sockets.lock = __RW_LOCK_UNLOCKED(local->connecting_sockets.lock);
@@ -1192,17 +1162,6 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 
 	list_add(&llcp_devices, &local->list);
 
-	return 0;
-
-err_rx_wq:
-	destroy_workqueue(local->rx_wq);
-
-err_tx_wq:
-	destroy_workqueue(local->tx_wq);
-
-err_local:
-	kfree(local);
-
 	return 0;
 }
 
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h
index 83b8bba5a280..af395c9ceb03 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp/llcp.h
@@ -56,12 +56,9 @@ struct nfc_llcp_local {
 
 	struct timer_list link_timer;
 	struct sk_buff_head tx_queue;
-	struct workqueue_struct	*tx_wq;
 	struct work_struct	 tx_work;
-	struct workqueue_struct	*rx_wq;
 	struct work_struct	 rx_work;
 	struct sk_buff *rx_pending;
-	struct workqueue_struct	*timeout_wq;
 	struct work_struct	 timeout_work;
 
 	u32 target_idx;
-- 
cgit v1.2.3


From e4c4789e55327e5f2bd6cafcccd46f9b6251bbc3 Mon Sep 17 00:00:00 2001
From: Eric Lapuyade <eric.lapuyade@linux.intel.com>
Date: Tue, 11 Sep 2012 10:42:54 +0200
Subject: NFC: Add a public nfc_hci_send_cmd_async method

This method initiates execution of an HCI cmd. Result will be delivered
through an asynchronous callback.

Signed-off-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h |  3 +++
 net/nfc/hci/command.c | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 6f065d563ec1..54ba2caab4a6 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -200,6 +200,9 @@ int nfc_hci_set_param(struct nfc_hci_dev *hdev, u8 gate, u8 idx,
 		      const u8 *param, size_t param_len);
 int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
 		     const u8 *param, size_t param_len, struct sk_buff **skb);
+int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
+			   const u8 *param, size_t param_len,
+			   data_exchange_cb_t cb, void *cb_context);
 int nfc_hci_send_response(struct nfc_hci_dev *hdev, u8 gate, u8 response,
 			  const u8 *param, size_t param_len);
 int nfc_hci_send_event(struct nfc_hci_dev *hdev, u8 gate, u8 event,
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 15e21093c7a5..71c6a7086b8f 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -28,6 +28,20 @@
 
 #include "hci.h"
 
+static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+			       const u8 *param, size_t param_len,
+			       data_exchange_cb_t cb, void *cb_context)
+{
+	pr_debug("exec cmd async through pipe=%d, cmd=%d, plen=%zd\n", pipe,
+		 cmd, param_len);
+
+	/* TODO: Define hci cmd execution delay. Should it be the same
+	 * for all commands?
+	 */
+	return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_COMMAND, cmd,
+				      param, param_len, cb, cb_context, 3000);
+}
+
 /*
  * HCI command execution completion callback.
  * err will be a standard linux error (may be converted from HCI response)
@@ -60,7 +74,8 @@ static int nfc_hci_execute_cmd(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 	hcp_ew.exec_complete = false;
 	hcp_ew.result_skb = NULL;
 
-	pr_debug("through pipe=%d, cmd=%d, plen=%zd\n", pipe, cmd, param_len);
+	pr_debug("exec cmd sync through pipe=%d, cmd=%d, plen=%zd\n", pipe,
+		 cmd, param_len);
 
 	/* TODO: Define hci cmd execution delay. Should it be the same
 	 * for all commands?
@@ -138,6 +153,23 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
 }
 EXPORT_SYMBOL(nfc_hci_send_cmd);
 
+int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
+			   const u8 *param, size_t param_len,
+			   data_exchange_cb_t cb, void *cb_context)
+{
+	u8 pipe;
+
+	pr_debug("\n");
+
+	pipe = hdev->gate2pipe[gate];
+	if (pipe == NFC_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	return nfc_hci_execute_cmd_async(hdev, pipe, cmd, param, param_len,
+					 cb, cb_context);
+}
+EXPORT_SYMBOL(nfc_hci_send_cmd_async);
+
 int nfc_hci_set_param(struct nfc_hci_dev *hdev, u8 gate, u8 idx,
 		      const u8 *param, size_t param_len)
 {
-- 
cgit v1.2.3


From f3e8fb552789f4845e60b11c47b676d14b9488e5 Mon Sep 17 00:00:00 2001
From: Eric Lapuyade <eric.lapuyade@linux.intel.com>
Date: Tue, 11 Sep 2012 10:43:50 +0200
Subject: NFC: Modified hci_transceive to become an asynchronous operation

This enables the completion callback to be called from a different
context, preventing a possible deadlock if the callback resulted in the
invocation of a nested call to the currently locked nfc_dev.
This is also more in line with the im_transceive nfc_ops for NFC Core or
NCI drivers which already behave asynchronously.

Signed-off-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/pn544_hci.c | 57 ++++++++++++++++++++++++++++++++++++-------------
 include/net/nfc/hci.h   |  8 +++++--
 include/net/nfc/shdlc.h |  4 ++--
 net/nfc/hci/core.c      | 57 +++++++++++++++++++++++++++++++++----------------
 net/nfc/hci/shdlc.c     |  5 +++--
 5 files changed, 92 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/nfc/pn544_hci.c b/drivers/nfc/pn544_hci.c
index d90aecfce739..a176d1f7bbf3 100644
--- a/drivers/nfc/pn544_hci.c
+++ b/drivers/nfc/pn544_hci.c
@@ -148,6 +148,9 @@ struct pn544_hci_info {
 				 * < 0 if hardware error occured (e.g. i2c err)
 				 * and prevents normal operation.
 				 */
+	int async_cb_type;
+	data_exchange_cb_t async_cb;
+	void *async_cb_context;
 };
 
 static void pn544_hci_platform_init(struct pn544_hci_info *info)
@@ -731,6 +734,26 @@ static int pn544_hci_complete_target_discovered(struct nfc_shdlc *shdlc,
 	return r;
 }
 
+#define PN544_CB_TYPE_READER_F 1
+
+static void pn544_hci_data_exchange_cb(void *context, struct sk_buff *skb,
+				       int err)
+{
+	struct pn544_hci_info *info = context;
+
+	switch (info->async_cb_type) {
+	case PN544_CB_TYPE_READER_F:
+		if (err == 0)
+			skb_pull(skb, 1);
+		info->async_cb(info->async_cb_context, skb, err);
+		break;
+	default:
+		if (err == 0)
+			kfree_skb(skb);
+		break;
+	}
+}
+
 #define MIFARE_CMD_AUTH_KEY_A	0x60
 #define MIFARE_CMD_AUTH_KEY_B	0x61
 #define MIFARE_CMD_HEADER	2
@@ -744,11 +767,11 @@ static int pn544_hci_complete_target_discovered(struct nfc_shdlc *shdlc,
  */
 static int pn544_hci_data_exchange(struct nfc_shdlc *shdlc,
 				   struct nfc_target *target,
-				   struct sk_buff *skb,
-				   struct sk_buff **res_skb)
+				   struct sk_buff *skb, data_exchange_cb_t cb,
+				   void *cb_context)
 {
+	struct pn544_hci_info *info = nfc_shdlc_get_clientdata(shdlc);
 	struct nfc_hci_dev *hdev = nfc_shdlc_get_hci_dev(shdlc);
-	int r;
 
 	pr_info(DRIVER_DESC ": %s for gate=%d\n", __func__,
 		target->hci_reader_gate);
@@ -773,25 +796,29 @@ static int pn544_hci_data_exchange(struct nfc_shdlc *shdlc,
 				memcpy(data, uid, MIFARE_UID_LEN);
 			}
 
-			return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
-						PN544_MIFARE_CMD,
-						skb->data, skb->len, res_skb);
+			return nfc_hci_send_cmd_async(hdev,
+						      target->hci_reader_gate,
+						      PN544_MIFARE_CMD,
+						      skb->data, skb->len,
+						      cb, cb_context);
 		} else
 			return 1;
 	case PN544_RF_READER_F_GATE:
 		*skb_push(skb, 1) = 0;
 		*skb_push(skb, 1) = 0;
 
-		r = nfc_hci_send_cmd(hdev, target->hci_reader_gate,
-				     PN544_FELICA_RAW,
-				     skb->data, skb->len, res_skb);
-		if (r == 0)
-			skb_pull(*res_skb, 1);
-		return r;
+		info->async_cb_type = PN544_CB_TYPE_READER_F;
+		info->async_cb = cb;
+		info->async_cb_context = cb_context;
+
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      PN544_FELICA_RAW, skb->data,
+					      skb->len,
+					      pn544_hci_data_exchange_cb, info);
 	case PN544_RF_READER_JEWEL_GATE:
-		return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
-					PN544_JEWEL_RAW_CMD,
-					skb->data, skb->len, res_skb);
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      PN544_JEWEL_RAW_CMD, skb->data,
+					      skb->len, cb, cb_context);
 	default:
 		return 1;
 	}
diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 54ba2caab4a6..6cee6e21fc46 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -38,8 +38,8 @@ struct nfc_hci_ops {
 	int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate,
 					   struct nfc_target *target);
 	int (*data_exchange) (struct nfc_hci_dev *hdev,
-			      struct nfc_target *target,
-			      struct sk_buff *skb, struct sk_buff **res_skb);
+			      struct nfc_target *target, struct sk_buff *skb,
+			      data_exchange_cb_t cb, void *cb_context);
 	int (*check_presence)(struct nfc_hci_dev *hdev,
 			      struct nfc_target *target);
 };
@@ -103,6 +103,10 @@ struct nfc_hci_dev {
 	u8 hw_mpw;
 	u8 hw_software;
 	u8 hw_bsid;
+
+	int async_cb_type;
+	data_exchange_cb_t async_cb;
+	void *async_cb_context;
 };
 
 /* hci device allocation */
diff --git a/include/net/nfc/shdlc.h b/include/net/nfc/shdlc.h
index 342427362989..fe5e19829352 100644
--- a/include/net/nfc/shdlc.h
+++ b/include/net/nfc/shdlc.h
@@ -34,8 +34,8 @@ struct nfc_shdlc_ops {
 	int (*complete_target_discovered) (struct nfc_shdlc *shdlc, u8 gate,
 					   struct nfc_target *target);
 	int (*data_exchange) (struct nfc_shdlc *shdlc,
-			      struct nfc_target *target,
-			      struct sk_buff *skb, struct sk_buff **res_skb);
+			      struct nfc_target *target, struct sk_buff *skb,
+			      data_exchange_cb_t cb, void *cb_context);
 	int (*check_presence)(struct nfc_shdlc *shdlc,
 			      struct nfc_target *target);
 };
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index e387c86e0cc7..dc57e3dc15a4 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -537,13 +537,37 @@ static void hci_deactivate_target(struct nfc_dev *nfc_dev,
 {
 }
 
+#define HCI_CB_TYPE_TRANSCEIVE 1
+
+static void hci_transceive_cb(void *context, struct sk_buff *skb, int err)
+{
+	struct nfc_hci_dev *hdev = context;
+
+	switch (hdev->async_cb_type) {
+	case HCI_CB_TYPE_TRANSCEIVE:
+		/*
+		 * TODO: Check RF Error indicator to make sure data is valid.
+		 * It seems that HCI cmd can complete without error, but data
+		 * can be invalid if an RF error occured? Ignore for now.
+		 */
+		if (err == 0)
+			skb_trim(skb, skb->len - 1); /* RF Err ind */
+
+		hdev->async_cb(hdev->async_cb_context, skb, err);
+		break;
+	default:
+		if (err == 0)
+			kfree_skb(skb);
+		break;
+	}
+}
+
 static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 			  struct sk_buff *skb, data_exchange_cb_t cb,
 			  void *cb_context)
 {
 	struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
 	int r;
-	struct sk_buff *res_skb = NULL;
 
 	pr_debug("target_idx=%d\n", target->idx);
 
@@ -551,40 +575,37 @@ static int hci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 	case NFC_HCI_RF_READER_A_GATE:
 	case NFC_HCI_RF_READER_B_GATE:
 		if (hdev->ops->data_exchange) {
-			r = hdev->ops->data_exchange(hdev, target, skb,
-						     &res_skb);
+			r = hdev->ops->data_exchange(hdev, target, skb, cb,
+						     cb_context);
 			if (r <= 0)	/* handled */
 				break;
 		}
 
 		*skb_push(skb, 1) = 0;	/* CTR, see spec:10.2.2.1 */
-		r = nfc_hci_send_cmd(hdev, target->hci_reader_gate,
-				     NFC_HCI_WR_XCHG_DATA,
-				     skb->data, skb->len, &res_skb);
-		/*
-		 * TODO: Check RF Error indicator to make sure data is valid.
-		 * It seems that HCI cmd can complete without error, but data
-		 * can be invalid if an RF error occured? Ignore for now.
-		 */
-		if (r == 0)
-			skb_trim(res_skb, res_skb->len - 1); /* RF Err ind */
+
+		hdev->async_cb_type = HCI_CB_TYPE_TRANSCEIVE;
+		hdev->async_cb = cb;
+		hdev->async_cb_context = cb_context;
+
+		r = nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					   NFC_HCI_WR_XCHG_DATA, skb->data,
+					   skb->len, hci_transceive_cb, hdev);
 		break;
 	default:
 		if (hdev->ops->data_exchange) {
-			r = hdev->ops->data_exchange(hdev, target, skb,
-						     &res_skb);
+			r = hdev->ops->data_exchange(hdev, target, skb, cb,
+						     cb_context);
 			if (r == 1)
 				r = -ENOTSUPP;
 		}
 		else
 			r = -ENOTSUPP;
+		break;
 	}
 
 	kfree_skb(skb);
 
-	cb(cb_context, res_skb, r);
-
-	return 0;
+	return r;
 }
 
 static int hci_check_presence(struct nfc_dev *nfc_dev,
diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c
index 9357ba7362f6..c63af7d3e859 100644
--- a/net/nfc/hci/shdlc.c
+++ b/net/nfc/hci/shdlc.c
@@ -777,12 +777,13 @@ static int nfc_shdlc_complete_target_discovered(struct nfc_hci_dev *hdev,
 static int nfc_shdlc_data_exchange(struct nfc_hci_dev *hdev,
 				   struct nfc_target *target,
 				   struct sk_buff *skb,
-				   struct sk_buff **res_skb)
+				   data_exchange_cb_t cb, void *cb_context)
 {
 	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
 
 	if (shdlc->ops->data_exchange)
-		return shdlc->ops->data_exchange(shdlc, target, skb, res_skb);
+		return shdlc->ops->data_exchange(shdlc, target, skb, cb,
+						 cb_context);
 
 	return -EPERM;
 }
-- 
cgit v1.2.3


From 67cccfe17d1b3da1ed6c79e643c9be95ebde9642 Mon Sep 17 00:00:00 2001
From: Eric Lapuyade <eric.lapuyade@linux.intel.com>
Date: Thu, 13 Sep 2012 17:10:00 +0200
Subject: NFC: Add an LLC Core layer to HCI

The LLC layer manages modules that control the link layer protocol (such
as shdlc) between HCI and an HCI driver. The driver must simply specify
the required llc when it registers with HCI.

Signed-off-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/llc.h |  51 +++++++++++++++
 net/nfc/hci/Makefile  |   2 +-
 net/nfc/hci/core.c    |  14 +++++
 net/nfc/hci/llc.c     | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/hci/llc.h     |  58 +++++++++++++++++
 5 files changed, 292 insertions(+), 1 deletion(-)
 create mode 100644 include/net/nfc/llc.h
 create mode 100644 net/nfc/hci/llc.c
 create mode 100644 net/nfc/hci/llc.h

(limited to 'include')

diff --git a/include/net/nfc/llc.h b/include/net/nfc/llc.h
new file mode 100644
index 000000000000..98df903f8b7d
--- /dev/null
+++ b/include/net/nfc/llc.h
@@ -0,0 +1,51 @@
+/*
+ * Link Layer Control manager public interface
+ *
+ * Copyright (C) 2012  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __NFC_LLC_H_
+#define __NFC_LLC_H_
+
+#include <net/nfc/hci.h>
+#include <linux/skbuff.h>
+
+typedef void (*rcv_to_hci_t) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
+typedef int (*xmit_to_drv_t) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
+typedef void (*llc_failure_t) (struct nfc_hci_dev *hdev, int err);
+
+struct nfc_llc;
+
+struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
+				 xmit_to_drv_t xmit_to_drv,
+				 rcv_to_hci_t rcv_to_hci, int tx_headroom,
+				 int tx_tailroom, llc_failure_t llc_failure);
+void nfc_llc_free(struct nfc_llc *llc);
+
+void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom,
+				   int *rx_tailroom);
+
+
+int nfc_llc_start(struct nfc_llc *llc);
+int nfc_llc_stop(struct nfc_llc *llc);
+void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb);
+int nfc_llc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb);
+
+int nfc_llc_init(void);
+void nfc_llc_exit(void);
+
+#endif /* __NFC_LLC_H_ */
diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile
index f9c44b2fb065..b44686b581af 100644
--- a/net/nfc/hci/Makefile
+++ b/net/nfc/hci/Makefile
@@ -4,5 +4,5 @@
 
 obj-$(CONFIG_NFC_HCI) += hci.o
 
-hci-y			:= core.o hcp.o command.o
+hci-y			:= core.o hcp.o command.o llc.o
 hci-$(CONFIG_NFC_SHDLC)	+= shdlc.o
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index dc57e3dc15a4..069e2d6056e5 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -26,6 +26,7 @@
 
 #include <net/nfc/nfc.h>
 #include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
 
 #include "hci.h"
 
@@ -821,4 +822,17 @@ void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(nfc_hci_recv_frame);
 
+static int __init nfc_hci_init(void)
+{
+	return nfc_llc_init();
+}
+
+static void __exit nfc_hci_exit(void)
+{
+	nfc_llc_exit();
+}
+
+module_init(nfc_hci_init);
+module_exit(nfc_hci_exit);
+
 MODULE_LICENSE("GPL");
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
new file mode 100644
index 000000000000..73c42785ce84
--- /dev/null
+++ b/net/nfc/hci/llc.c
@@ -0,0 +1,168 @@
+/*
+ * Link Layer Control manager
+ *
+ * Copyright (C) 2012  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/export.h>
+#include <net/nfc/llc.h>
+#include "llc.h"
+
+static struct list_head llc_engines;
+
+int nfc_llc_init(void)
+{
+	INIT_LIST_HEAD(&llc_engines);
+
+	return 0;
+}
+EXPORT_SYMBOL(nfc_llc_init);
+
+void nfc_llc_exit(void)
+{
+	struct nfc_llc_engine *llc_engine, *n;
+
+	list_for_each_entry_safe(llc_engine, n, &llc_engines, entry) {
+		list_del(&llc_engine->entry);
+		kfree(llc_engine->name);
+		kfree(llc_engine);
+	}
+}
+EXPORT_SYMBOL(nfc_llc_exit);
+
+int nfc_llc_register(const char *name, struct nfc_llc_ops *ops)
+{
+	struct nfc_llc_engine *llc_engine;
+
+	llc_engine = kzalloc(sizeof(struct nfc_llc_engine), GFP_KERNEL);
+	if (llc_engine == NULL)
+		return -ENOMEM;
+
+	llc_engine->name = kstrdup(name, GFP_KERNEL);
+	if (llc_engine->name == NULL) {
+		kfree(llc_engine);
+		return -ENOMEM;
+	}
+	llc_engine->ops = ops;
+
+	INIT_LIST_HEAD(&llc_engine->entry);
+	list_add_tail (&llc_engine->entry, &llc_engines);
+
+	return 0;
+}
+EXPORT_SYMBOL(nfc_llc_register);
+
+static struct nfc_llc_engine *nfc_llc_name_to_engine(const char *name)
+{
+	struct nfc_llc_engine *llc_engine;
+
+	list_for_each_entry(llc_engine, &llc_engines, entry) {
+		if (strcmp(llc_engine->name, name) == 0)
+			return llc_engine;
+	}
+
+	return NULL;
+}
+
+void nfc_llc_unregister(const char *name)
+{
+	struct nfc_llc_engine *llc_engine;
+
+	llc_engine = nfc_llc_name_to_engine(name);
+	if (llc_engine == NULL)
+		return;
+
+	list_del(&llc_engine->entry);
+	kfree(llc_engine->name);
+	kfree(llc_engine);
+}
+EXPORT_SYMBOL(nfc_llc_unregister);
+
+struct nfc_llc *nfc_llc_allocate(const char *name, struct nfc_hci_dev *hdev,
+				 xmit_to_drv_t xmit_to_drv,
+				 rcv_to_hci_t rcv_to_hci, int tx_headroom,
+				 int tx_tailroom, llc_failure_t llc_failure)
+{
+	struct nfc_llc_engine *llc_engine;
+	struct nfc_llc *llc;
+
+	llc_engine = nfc_llc_name_to_engine(name);
+	if (llc_engine == NULL)
+		return NULL;
+
+	llc = kzalloc(sizeof(struct nfc_llc), GFP_KERNEL);
+	if (llc == NULL)
+		return NULL;
+
+	llc->data = llc_engine->ops->init(hdev, xmit_to_drv, rcv_to_hci,
+					  tx_headroom, tx_tailroom,
+					  &llc->rx_headroom, &llc->rx_tailroom,
+					  llc_failure);
+	if (llc->data == NULL) {
+		kfree(llc);
+		return NULL;
+	}
+	llc->ops = llc_engine->ops;
+
+	return llc;
+}
+EXPORT_SYMBOL(nfc_llc_allocate);
+
+void nfc_llc_free(struct nfc_llc *llc)
+{
+	llc->ops->deinit(llc);
+	kfree(llc);
+}
+EXPORT_SYMBOL(nfc_llc_free);
+
+inline void nfc_llc_get_rx_head_tail_room(struct nfc_llc *llc, int *rx_headroom,
+					  int *rx_tailroom)
+{
+	*rx_headroom = llc->rx_headroom;
+	*rx_tailroom = llc->rx_tailroom;
+}
+EXPORT_SYMBOL(nfc_llc_get_rx_head_tail_room);
+
+inline int nfc_llc_start(struct nfc_llc *llc)
+{
+	return llc->ops->start(llc);
+}
+EXPORT_SYMBOL(nfc_llc_start);
+
+inline int nfc_llc_stop(struct nfc_llc *llc)
+{
+	return llc->ops->stop(llc);
+}
+EXPORT_SYMBOL(nfc_llc_stop);
+
+inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
+{
+	llc->ops->rcv_from_drv(llc, skb);
+}
+EXPORT_SYMBOL(nfc_llc_rcv_from_drv);
+
+inline int nfc_llc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
+{
+	return llc->ops->xmit_from_hci(llc, skb);
+}
+EXPORT_SYMBOL(nfc_llc_xmit_from_hci);
+
+inline void *nfc_llc_get_data(struct nfc_llc *llc)
+{
+	return llc->data;
+}
+EXPORT_SYMBOL(nfc_llc_get_data);
diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h
new file mode 100644
index 000000000000..b2c7285b0309
--- /dev/null
+++ b/net/nfc/hci/llc.h
@@ -0,0 +1,58 @@
+/*
+ * Link Layer Control manager
+ *
+ * Copyright (C) 2012  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LOCAL_LLC_H_
+#define __LOCAL_LLC_H_
+
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+#include <linux/skbuff.h>
+
+struct nfc_llc_ops {
+	void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+		       rcv_to_hci_t rcv_to_hci, int tx_headroom,
+		       int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+		       llc_failure_t llc_failure);
+	void (*deinit) (struct nfc_llc *llc);
+	int (*start) (struct nfc_llc *llc);
+	int (*stop) (struct nfc_llc *llc);
+	void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb);
+	int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb);
+};
+
+struct nfc_llc_engine {
+	const char *name;
+	struct nfc_llc_ops *ops;
+	struct list_head entry;
+};
+
+struct nfc_llc {
+	void *data;
+	struct nfc_llc_ops *ops;
+	int rx_headroom;
+	int rx_tailroom;
+};
+
+void *nfc_llc_get_data(struct nfc_llc *llc);
+
+int nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+void nfc_llc_unregister(const char *name);
+
+#endif /* __LOCAL_LLC_H_ */
-- 
cgit v1.2.3


From 8af00d48dc929442644bf68e9cd3d951d9697296 Mon Sep 17 00:00:00 2001
From: Eric Lapuyade <eric.lapuyade@linux.intel.com>
Date: Thu, 13 Sep 2012 17:10:48 +0200
Subject: NFC: Add a nop (passthrough) llc module to llc core

This is a passthrough llc. It can be used by HCI drivers that don't
need link layer control. HCI will then write directly to the driver, and
driver will deliver incoming frames directly to HCI without any
processing.

Signed-off-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/llc.h |   2 +
 net/nfc/hci/Makefile  |   2 +-
 net/nfc/hci/llc.c     |   2 +-
 net/nfc/hci/llc.h     |   2 +
 net/nfc/hci/llc_nop.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 107 insertions(+), 2 deletions(-)
 create mode 100644 net/nfc/hci/llc_nop.c

(limited to 'include')

diff --git a/include/net/nfc/llc.h b/include/net/nfc/llc.h
index 98df903f8b7d..146e053f91e7 100644
--- a/include/net/nfc/llc.h
+++ b/include/net/nfc/llc.h
@@ -24,6 +24,8 @@
 #include <net/nfc/hci.h>
 #include <linux/skbuff.h>
 
+#define LLC_NOP_NAME "nop"
+
 typedef void (*rcv_to_hci_t) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
 typedef int (*xmit_to_drv_t) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
 typedef void (*llc_failure_t) (struct nfc_hci_dev *hdev, int err);
diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile
index b44686b581af..2ec4e5876f6b 100644
--- a/net/nfc/hci/Makefile
+++ b/net/nfc/hci/Makefile
@@ -4,5 +4,5 @@
 
 obj-$(CONFIG_NFC_HCI) += hci.o
 
-hci-y			:= core.o hcp.o command.o llc.o
+hci-y			:= core.o hcp.o command.o llc.o llc_nop.o
 hci-$(CONFIG_NFC_SHDLC)	+= shdlc.o
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 73c42785ce84..32002e5339c0 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -28,7 +28,7 @@ int nfc_llc_init(void)
 {
 	INIT_LIST_HEAD(&llc_engines);
 
-	return 0;
+	return nfc_llc_nop_register();
 }
 EXPORT_SYMBOL(nfc_llc_init);
 
diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h
index b2c7285b0309..acdd8d1bbae5 100644
--- a/net/nfc/hci/llc.h
+++ b/net/nfc/hci/llc.h
@@ -55,4 +55,6 @@ void *nfc_llc_get_data(struct nfc_llc *llc);
 int nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
 void nfc_llc_unregister(const char *name);
 
+int nfc_llc_nop_register(void);
+
 #endif /* __LOCAL_LLC_H_ */
diff --git a/net/nfc/hci/llc_nop.c b/net/nfc/hci/llc_nop.c
new file mode 100644
index 000000000000..ec627cee12cd
--- /dev/null
+++ b/net/nfc/hci/llc_nop.c
@@ -0,0 +1,101 @@
+/*
+ * nop (passthrough) Link Layer Control
+ *
+ * Copyright (C) 2012  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+
+#include "llc.h"
+
+struct llc_nop {
+	struct nfc_hci_dev *hdev;
+	xmit_to_drv_t xmit_to_drv;
+	rcv_to_hci_t rcv_to_hci;
+	int tx_headroom;
+	int tx_tailroom;
+	llc_failure_t llc_failure;
+};
+
+static void *llc_nop_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+			  rcv_to_hci_t rcv_to_hci, int tx_headroom,
+			  int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+			  llc_failure_t llc_failure)
+{
+	struct llc_nop *llc_nop;
+
+	*rx_headroom = 0;
+	*rx_tailroom = 0;
+
+	llc_nop = kzalloc(sizeof(struct llc_nop), GFP_KERNEL);
+	if (llc_nop == NULL)
+		return NULL;
+
+	llc_nop->hdev = hdev;
+	llc_nop->xmit_to_drv = xmit_to_drv;
+	llc_nop->rcv_to_hci = rcv_to_hci;
+	llc_nop->tx_headroom = tx_headroom;
+	llc_nop->tx_tailroom = tx_tailroom;
+	llc_nop->llc_failure = llc_failure;
+
+	return llc_nop;
+}
+
+static void llc_nop_deinit(struct nfc_llc *llc)
+{
+	kfree(nfc_llc_get_data(llc));
+}
+
+static int llc_nop_start(struct nfc_llc *llc)
+{
+	return 0;
+}
+
+static int llc_nop_stop(struct nfc_llc *llc)
+{
+	return 0;
+}
+
+static void llc_nop_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
+{
+	struct llc_nop *llc_nop = nfc_llc_get_data(llc);
+
+	llc_nop->rcv_to_hci(llc_nop->hdev, skb);
+}
+
+static int llc_nop_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
+{
+	struct llc_nop *llc_nop = nfc_llc_get_data(llc);
+
+	return llc_nop->xmit_to_drv(llc_nop->hdev, skb);
+}
+
+static struct nfc_llc_ops llc_nop_ops = {
+	.init = llc_nop_init,
+	.deinit = llc_nop_deinit,
+	.start = llc_nop_start,
+	.stop = llc_nop_stop,
+	.rcv_from_drv = llc_nop_rcv_from_drv,
+	.xmit_from_hci = llc_nop_xmit_from_hci,
+};
+
+int nfc_llc_nop_register()
+{
+	return nfc_llc_register(LLC_NOP_NAME, &llc_nop_ops);
+}
+EXPORT_SYMBOL(nfc_llc_nop_register);
-- 
cgit v1.2.3


From 4a61cd6687fc6348d08724676d34e38160d6cf9b Mon Sep 17 00:00:00 2001
From: Eric Lapuyade <eric.lapuyade@linux.intel.com>
Date: Thu, 13 Sep 2012 17:11:37 +0200
Subject: NFC: Add an shdlc llc module to llc core

This is used by HCI drivers such as the one for the pn544 which require
communications between HCI and the chip to use shdlc.

Signed-off-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/llc.h   |   1 +
 net/nfc/hci/Makefile    |   2 +-
 net/nfc/hci/llc.c       |  16 +-
 net/nfc/hci/llc.h       |   1 +
 net/nfc/hci/llc_shdlc.c | 834 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 852 insertions(+), 2 deletions(-)
 create mode 100644 net/nfc/hci/llc_shdlc.c

(limited to 'include')

diff --git a/include/net/nfc/llc.h b/include/net/nfc/llc.h
index 146e053f91e7..400ab7ae749d 100644
--- a/include/net/nfc/llc.h
+++ b/include/net/nfc/llc.h
@@ -25,6 +25,7 @@
 #include <linux/skbuff.h>
 
 #define LLC_NOP_NAME "nop"
+#define LLC_SHDLC_NAME "shdlc"
 
 typedef void (*rcv_to_hci_t) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
 typedef int (*xmit_to_drv_t) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile
index 2ec4e5876f6b..c4d65479629b 100644
--- a/net/nfc/hci/Makefile
+++ b/net/nfc/hci/Makefile
@@ -5,4 +5,4 @@
 obj-$(CONFIG_NFC_HCI) += hci.o
 
 hci-y			:= core.o hcp.o command.o llc.o llc_nop.o
-hci-$(CONFIG_NFC_SHDLC)	+= shdlc.o
+hci-$(CONFIG_NFC_SHDLC) += shdlc.o llc_shdlc.o
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 32002e5339c0..bd11b0f7658a 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -26,9 +26,23 @@ static struct list_head llc_engines;
 
 int nfc_llc_init(void)
 {
+	int r;
+
 	INIT_LIST_HEAD(&llc_engines);
 
-	return nfc_llc_nop_register();
+	r = nfc_llc_nop_register();
+	if (r)
+		goto exit;
+
+	r = nfc_llc_shdlc_register();
+	if (r)
+		goto exit;
+
+	return 0;
+
+exit:
+	nfc_llc_exit();
+	return r;
 }
 EXPORT_SYMBOL(nfc_llc_init);
 
diff --git a/net/nfc/hci/llc.h b/net/nfc/hci/llc.h
index acdd8d1bbae5..c7014fdfc8c9 100644
--- a/net/nfc/hci/llc.h
+++ b/net/nfc/hci/llc.h
@@ -56,5 +56,6 @@ int nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
 void nfc_llc_unregister(const char *name);
 
 int nfc_llc_nop_register(void);
+int nfc_llc_shdlc_register(void);
 
 #endif /* __LOCAL_LLC_H_ */
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
new file mode 100644
index 000000000000..bb191100ee96
--- /dev/null
+++ b/net/nfc/hci/llc_shdlc.c
@@ -0,0 +1,834 @@
+/*
+ * shdlc Link Layer Control
+ *
+ * Copyright (C) 2012  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define pr_fmt(fmt) "shdlc: %s: " fmt, __func__
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/export.h>
+#include <linux/wait.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+
+#include "llc.h"
+
+enum shdlc_state {
+	SHDLC_DISCONNECTED = 0,
+	SHDLC_CONNECTING = 1,
+	SHDLC_NEGOCIATING = 2,
+	SHDLC_CONNECTED = 3
+};
+
+struct llc_shdlc {
+	struct nfc_hci_dev *hdev;
+	xmit_to_drv_t xmit_to_drv;
+	rcv_to_hci_t rcv_to_hci;
+
+	struct mutex state_mutex;
+	enum shdlc_state state;
+	int hard_fault;
+
+	wait_queue_head_t *connect_wq;
+	int connect_tries;
+	int connect_result;
+	struct timer_list connect_timer;/* aka T3 in spec 10.6.1 */
+
+	u8 w;				/* window size */
+	bool srej_support;
+
+	struct timer_list t1_timer;	/* send ack timeout */
+	bool t1_active;
+
+	struct timer_list t2_timer;	/* guard/retransmit timeout */
+	bool t2_active;
+
+	int ns;				/* next seq num for send */
+	int nr;				/* next expected seq num for receive */
+	int dnr;			/* oldest sent unacked seq num */
+
+	struct sk_buff_head rcv_q;
+
+	struct sk_buff_head send_q;
+	bool rnr;			/* other side is not ready to receive */
+
+	struct sk_buff_head ack_pending_q;
+
+	struct work_struct sm_work;
+
+	int tx_headroom;
+	int tx_tailroom;
+
+	llc_failure_t llc_failure;
+};
+
+#define SHDLC_LLC_HEAD_ROOM	2
+
+#define SHDLC_MAX_WINDOW	4
+#define SHDLC_SREJ_SUPPORT	false
+
+#define SHDLC_CONTROL_HEAD_MASK	0xe0
+#define SHDLC_CONTROL_HEAD_I	0x80
+#define SHDLC_CONTROL_HEAD_I2	0xa0
+#define SHDLC_CONTROL_HEAD_S	0xc0
+#define SHDLC_CONTROL_HEAD_U	0xe0
+
+#define SHDLC_CONTROL_NS_MASK	0x38
+#define SHDLC_CONTROL_NR_MASK	0x07
+#define SHDLC_CONTROL_TYPE_MASK	0x18
+
+#define SHDLC_CONTROL_M_MASK	0x1f
+
+enum sframe_type {
+	S_FRAME_RR = 0x00,
+	S_FRAME_REJ = 0x01,
+	S_FRAME_RNR = 0x02,
+	S_FRAME_SREJ = 0x03
+};
+
+enum uframe_modifier {
+	U_FRAME_UA = 0x06,
+	U_FRAME_RSET = 0x19
+};
+
+#define SHDLC_CONNECT_VALUE_MS	5
+#define SHDLC_T1_VALUE_MS(w)	((5 * w) / 4)
+#define SHDLC_T2_VALUE_MS	300
+
+#define SHDLC_DUMP_SKB(info, skb)				  \
+do {								  \
+	pr_debug("%s:\n", info);				  \
+	print_hex_dump(KERN_DEBUG, "shdlc: ", DUMP_PREFIX_OFFSET, \
+		       16, 1, skb->data, skb->len, 0);		  \
+} while (0)
+
+/* checks x < y <= z modulo 8 */
+static bool llc_shdlc_x_lt_y_lteq_z(int x, int y, int z)
+{
+	if (x < z)
+		return ((x < y) && (y <= z)) ? true : false;
+	else
+		return ((y > x) || (y <= z)) ? true : false;
+}
+
+/* checks x <= y < z modulo 8 */
+static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
+{
+	if (x <= z)
+		return ((x <= y) && (y < z)) ? true : false;
+	else			/* x > z -> z+8 > x */
+		return ((y >= x) || (y < z)) ? true : false;
+}
+
+static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
+					   int payload_len)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(shdlc->tx_headroom + SHDLC_LLC_HEAD_ROOM +
+			shdlc->tx_tailroom + payload_len, GFP_KERNEL);
+	if (skb)
+		skb_reserve(skb, shdlc->tx_headroom + SHDLC_LLC_HEAD_ROOM);
+
+	return skb;
+}
+
+/* immediately sends an S frame. */
+static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
+				  enum sframe_type sframe_type, int nr)
+{
+	int r;
+	struct sk_buff *skb;
+
+	pr_debug("sframe_type=%d nr=%d\n", sframe_type, nr);
+
+	skb = llc_shdlc_alloc_skb(shdlc, 0);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr;
+
+	r = shdlc->xmit_to_drv(shdlc->hdev, skb);
+
+	kfree_skb(skb);
+
+	return r;
+}
+
+/* immediately sends an U frame. skb may contain optional payload */
+static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc,
+				  struct sk_buff *skb,
+				  enum uframe_modifier uframe_modifier)
+{
+	int r;
+
+	pr_debug("uframe_modifier=%d\n", uframe_modifier);
+
+	*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier;
+
+	r = shdlc->xmit_to_drv(shdlc->hdev, skb);
+
+	kfree_skb(skb);
+
+	return r;
+}
+
+/*
+ * Free ack_pending frames until y_nr - 1, and reset t2 according to
+ * the remaining oldest ack_pending frame sent time
+ */
+static void llc_shdlc_reset_t2(struct llc_shdlc *shdlc, int y_nr)
+{
+	struct sk_buff *skb;
+	int dnr = shdlc->dnr;	/* MUST initially be < y_nr */
+
+	pr_debug("release ack pending up to frame %d excluded\n", y_nr);
+
+	while (dnr != y_nr) {
+		pr_debug("release ack pending frame %d\n", dnr);
+
+		skb = skb_dequeue(&shdlc->ack_pending_q);
+		kfree_skb(skb);
+
+		dnr = (dnr + 1) % 8;
+	}
+
+	if (skb_queue_empty(&shdlc->ack_pending_q)) {
+		if (shdlc->t2_active) {
+			del_timer_sync(&shdlc->t2_timer);
+			shdlc->t2_active = false;
+
+			pr_debug
+			    ("All sent frames acked. Stopped T2(retransmit)\n");
+		}
+	} else {
+		skb = skb_peek(&shdlc->ack_pending_q);
+
+		mod_timer(&shdlc->t2_timer, *(unsigned long *)skb->cb +
+			  msecs_to_jiffies(SHDLC_T2_VALUE_MS));
+		shdlc->t2_active = true;
+
+		pr_debug
+		    ("Start T2(retransmit) for remaining unacked sent frames\n");
+	}
+}
+
+/*
+ * Receive validated frames from lower layer. skb contains HCI payload only.
+ * Handle according to algorithm at spec:10.8.2
+ */
+static void llc_shdlc_rcv_i_frame(struct llc_shdlc *shdlc,
+				  struct sk_buff *skb, int ns, int nr)
+{
+	int x_ns = ns;
+	int y_nr = nr;
+
+	pr_debug("recvd I-frame %d, remote waiting frame %d\n", ns, nr);
+
+	if (shdlc->state != SHDLC_CONNECTED)
+		goto exit;
+
+	if (x_ns != shdlc->nr) {
+		llc_shdlc_send_s_frame(shdlc, S_FRAME_REJ, shdlc->nr);
+		goto exit;
+	}
+
+	if (shdlc->t1_active == false) {
+		shdlc->t1_active = true;
+		mod_timer(&shdlc->t1_timer, jiffies +
+			  msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w)));
+		pr_debug("(re)Start T1(send ack)\n");
+	}
+
+	if (skb->len) {
+		shdlc->rcv_to_hci(shdlc->hdev, skb);
+		skb = NULL;
+	}
+
+	shdlc->nr = (shdlc->nr + 1) % 8;
+
+	if (llc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) {
+		llc_shdlc_reset_t2(shdlc, y_nr);
+
+		shdlc->dnr = y_nr;
+	}
+
+exit:
+	kfree_skb(skb);
+}
+
+static void llc_shdlc_rcv_ack(struct llc_shdlc *shdlc, int y_nr)
+{
+	pr_debug("remote acked up to frame %d excluded\n", y_nr);
+
+	if (llc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) {
+		llc_shdlc_reset_t2(shdlc, y_nr);
+		shdlc->dnr = y_nr;
+	}
+}
+
+static void llc_shdlc_requeue_ack_pending(struct llc_shdlc *shdlc)
+{
+	struct sk_buff *skb;
+
+	pr_debug("ns reset to %d\n", shdlc->dnr);
+
+	while ((skb = skb_dequeue_tail(&shdlc->ack_pending_q))) {
+		skb_pull(skb, 1);	/* remove control field */
+		skb_queue_head(&shdlc->send_q, skb);
+	}
+	shdlc->ns = shdlc->dnr;
+}
+
+static void llc_shdlc_rcv_rej(struct llc_shdlc *shdlc, int y_nr)
+{
+	struct sk_buff *skb;
+
+	pr_debug("remote asks retransmition from frame %d\n", y_nr);
+
+	if (llc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) {
+		if (shdlc->t2_active) {
+			del_timer_sync(&shdlc->t2_timer);
+			shdlc->t2_active = false;
+			pr_debug("Stopped T2(retransmit)\n");
+		}
+
+		if (shdlc->dnr != y_nr) {
+			while ((shdlc->dnr = ((shdlc->dnr + 1) % 8)) != y_nr) {
+				skb = skb_dequeue(&shdlc->ack_pending_q);
+				kfree_skb(skb);
+			}
+		}
+
+		llc_shdlc_requeue_ack_pending(shdlc);
+	}
+}
+
+/* See spec RR:10.8.3 REJ:10.8.4 */
+static void llc_shdlc_rcv_s_frame(struct llc_shdlc *shdlc,
+				  enum sframe_type s_frame_type, int nr)
+{
+	struct sk_buff *skb;
+
+	if (shdlc->state != SHDLC_CONNECTED)
+		return;
+
+	switch (s_frame_type) {
+	case S_FRAME_RR:
+		llc_shdlc_rcv_ack(shdlc, nr);
+		if (shdlc->rnr == true) {	/* see SHDLC 10.7.7 */
+			shdlc->rnr = false;
+			if (shdlc->send_q.qlen == 0) {
+				skb = llc_shdlc_alloc_skb(shdlc, 0);
+				if (skb)
+					skb_queue_tail(&shdlc->send_q, skb);
+			}
+		}
+		break;
+	case S_FRAME_REJ:
+		llc_shdlc_rcv_rej(shdlc, nr);
+		break;
+	case S_FRAME_RNR:
+		llc_shdlc_rcv_ack(shdlc, nr);
+		shdlc->rnr = true;
+		break;
+	default:
+		break;
+	}
+}
+
+static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r)
+{
+	pr_debug("result=%d\n", r);
+
+	del_timer_sync(&shdlc->connect_timer);
+
+	if (r == 0) {
+		shdlc->ns = 0;
+		shdlc->nr = 0;
+		shdlc->dnr = 0;
+
+		shdlc->state = SHDLC_CONNECTED;
+	} else {
+		shdlc->state = SHDLC_DISCONNECTED;
+	}
+
+	shdlc->connect_result = r;
+
+	wake_up(shdlc->connect_wq);
+}
+
+static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
+{
+	struct sk_buff *skb;
+
+	pr_debug("\n");
+
+	skb = llc_shdlc_alloc_skb(shdlc, 2);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	*skb_put(skb, 1) = SHDLC_MAX_WINDOW;
+	*skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0;
+
+	return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
+}
+
+static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc)
+{
+	struct sk_buff *skb;
+
+	pr_debug("\n");
+
+	skb = llc_shdlc_alloc_skb(shdlc, 0);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_UA);
+}
+
+static void llc_shdlc_rcv_u_frame(struct llc_shdlc *shdlc,
+				  struct sk_buff *skb,
+				  enum uframe_modifier u_frame_modifier)
+{
+	u8 w = SHDLC_MAX_WINDOW;
+	bool srej_support = SHDLC_SREJ_SUPPORT;
+	int r;
+
+	pr_debug("u_frame_modifier=%d\n", u_frame_modifier);
+
+	switch (u_frame_modifier) {
+	case U_FRAME_RSET:
+		if (shdlc->state == SHDLC_NEGOCIATING) {
+			/* we sent RSET, but chip wants to negociate */
+			if (skb->len > 0)
+				w = skb->data[0];
+
+			if (skb->len > 1)
+				srej_support = skb->data[1] & 0x01 ? true :
+					       false;
+
+			if ((w <= SHDLC_MAX_WINDOW) &&
+			    (SHDLC_SREJ_SUPPORT || (srej_support == false))) {
+				shdlc->w = w;
+				shdlc->srej_support = srej_support;
+				r = llc_shdlc_connect_send_ua(shdlc);
+				llc_shdlc_connect_complete(shdlc, r);
+			}
+		} else if (shdlc->state == SHDLC_CONNECTED) {
+			/*
+			 * Chip wants to reset link. This is unexpected and
+			 * unsupported.
+			 */
+			shdlc->hard_fault = -ECONNRESET;
+		}
+		break;
+	case U_FRAME_UA:
+		if ((shdlc->state == SHDLC_CONNECTING &&
+		     shdlc->connect_tries > 0) ||
+		    (shdlc->state == SHDLC_NEGOCIATING))
+			llc_shdlc_connect_complete(shdlc, 0);
+		break;
+	default:
+		break;
+	}
+
+	kfree_skb(skb);
+}
+
+static void llc_shdlc_handle_rcv_queue(struct llc_shdlc *shdlc)
+{
+	struct sk_buff *skb;
+	u8 control;
+	int nr;
+	int ns;
+	enum sframe_type s_frame_type;
+	enum uframe_modifier u_frame_modifier;
+
+	if (shdlc->rcv_q.qlen)
+		pr_debug("rcvQlen=%d\n", shdlc->rcv_q.qlen);
+
+	while ((skb = skb_dequeue(&shdlc->rcv_q)) != NULL) {
+		control = skb->data[0];
+		skb_pull(skb, 1);
+		switch (control & SHDLC_CONTROL_HEAD_MASK) {
+		case SHDLC_CONTROL_HEAD_I:
+		case SHDLC_CONTROL_HEAD_I2:
+			ns = (control & SHDLC_CONTROL_NS_MASK) >> 3;
+			nr = control & SHDLC_CONTROL_NR_MASK;
+			llc_shdlc_rcv_i_frame(shdlc, skb, ns, nr);
+			break;
+		case SHDLC_CONTROL_HEAD_S:
+			s_frame_type = (control & SHDLC_CONTROL_TYPE_MASK) >> 3;
+			nr = control & SHDLC_CONTROL_NR_MASK;
+			llc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr);
+			kfree_skb(skb);
+			break;
+		case SHDLC_CONTROL_HEAD_U:
+			u_frame_modifier = control & SHDLC_CONTROL_M_MASK;
+			llc_shdlc_rcv_u_frame(shdlc, skb, u_frame_modifier);
+			break;
+		default:
+			pr_err("UNKNOWN Control=%d\n", control);
+			kfree_skb(skb);
+			break;
+		}
+	}
+}
+
+static int llc_shdlc_w_used(int ns, int dnr)
+{
+	int unack_count;
+
+	if (dnr <= ns)
+		unack_count = ns - dnr;
+	else
+		unack_count = 8 - dnr + ns;
+
+	return unack_count;
+}
+
+/* Send frames according to algorithm at spec:10.8.1 */
+static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
+{
+	struct sk_buff *skb;
+	int r;
+	unsigned long time_sent;
+
+	if (shdlc->send_q.qlen)
+		pr_debug
+		    ("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n",
+		     shdlc->send_q.qlen, shdlc->ns, shdlc->dnr,
+		     shdlc->rnr == false ? "false" : "true",
+		     shdlc->w - llc_shdlc_w_used(shdlc->ns, shdlc->dnr),
+		     shdlc->ack_pending_q.qlen);
+
+	while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w &&
+	       (shdlc->rnr == false)) {
+
+		if (shdlc->t1_active) {
+			del_timer_sync(&shdlc->t1_timer);
+			shdlc->t1_active = false;
+			pr_debug("Stopped T1(send ack)\n");
+		}
+
+		skb = skb_dequeue(&shdlc->send_q);
+
+		*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_I | (shdlc->ns << 3) |
+				    shdlc->nr;
+
+		pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns,
+			 shdlc->nr);
+	/*	SHDLC_DUMP_SKB("shdlc frame written", skb); */
+
+		r = shdlc->xmit_to_drv(shdlc->hdev, skb);
+		if (r < 0) {
+			shdlc->hard_fault = r;
+			break;
+		}
+
+		shdlc->ns = (shdlc->ns + 1) % 8;
+
+		time_sent = jiffies;
+		*(unsigned long *)skb->cb = time_sent;
+
+		skb_queue_tail(&shdlc->ack_pending_q, skb);
+
+		if (shdlc->t2_active == false) {
+			shdlc->t2_active = true;
+			mod_timer(&shdlc->t2_timer, time_sent +
+				  msecs_to_jiffies(SHDLC_T2_VALUE_MS));
+			pr_debug("Started T2 (retransmit)\n");
+		}
+	}
+}
+
+static void llc_shdlc_connect_timeout(unsigned long data)
+{
+	struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+
+	pr_debug("\n");
+
+	queue_work(system_nrt_wq, &shdlc->sm_work);
+}
+
+static void llc_shdlc_t1_timeout(unsigned long data)
+{
+	struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+
+	pr_debug("SoftIRQ: need to send ack\n");
+
+	queue_work(system_nrt_wq, &shdlc->sm_work);
+}
+
+static void llc_shdlc_t2_timeout(unsigned long data)
+{
+	struct llc_shdlc *shdlc = (struct llc_shdlc *)data;
+
+	pr_debug("SoftIRQ: need to retransmit\n");
+
+	queue_work(system_nrt_wq, &shdlc->sm_work);
+}
+
+static void llc_shdlc_sm_work(struct work_struct *work)
+{
+	struct llc_shdlc *shdlc = container_of(work, struct llc_shdlc, sm_work);
+	int r;
+
+	pr_debug("\n");
+
+	mutex_lock(&shdlc->state_mutex);
+
+	switch (shdlc->state) {
+	case SHDLC_DISCONNECTED:
+		skb_queue_purge(&shdlc->rcv_q);
+		skb_queue_purge(&shdlc->send_q);
+		skb_queue_purge(&shdlc->ack_pending_q);
+		break;
+	case SHDLC_CONNECTING:
+		if (shdlc->hard_fault) {
+			llc_shdlc_connect_complete(shdlc, shdlc->hard_fault);
+			break;
+		}
+
+		if (shdlc->connect_tries++ < 5)
+			r = llc_shdlc_connect_initiate(shdlc);
+		else
+			r = -ETIME;
+		if (r < 0)
+			llc_shdlc_connect_complete(shdlc, r);
+		else {
+			mod_timer(&shdlc->connect_timer, jiffies +
+				  msecs_to_jiffies(SHDLC_CONNECT_VALUE_MS));
+
+			shdlc->state = SHDLC_NEGOCIATING;
+		}
+		break;
+	case SHDLC_NEGOCIATING:
+		if (timer_pending(&shdlc->connect_timer) == 0) {
+			shdlc->state = SHDLC_CONNECTING;
+			queue_work(system_nrt_wq, &shdlc->sm_work);
+		}
+
+		llc_shdlc_handle_rcv_queue(shdlc);
+
+		if (shdlc->hard_fault) {
+			llc_shdlc_connect_complete(shdlc, shdlc->hard_fault);
+			break;
+		}
+		break;
+	case SHDLC_CONNECTED:
+		llc_shdlc_handle_rcv_queue(shdlc);
+		llc_shdlc_handle_send_queue(shdlc);
+
+		if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) {
+			pr_debug
+			    ("Handle T1(send ack) elapsed (T1 now inactive)\n");
+
+			shdlc->t1_active = false;
+			r = llc_shdlc_send_s_frame(shdlc, S_FRAME_RR,
+						   shdlc->nr);
+			if (r < 0)
+				shdlc->hard_fault = r;
+		}
+
+		if (shdlc->t2_active && timer_pending(&shdlc->t2_timer) == 0) {
+			pr_debug
+			    ("Handle T2(retransmit) elapsed (T2 inactive)\n");
+
+			shdlc->t2_active = false;
+
+			llc_shdlc_requeue_ack_pending(shdlc);
+			llc_shdlc_handle_send_queue(shdlc);
+		}
+
+		if (shdlc->hard_fault) {
+			shdlc->llc_failure(shdlc->hdev, shdlc->hard_fault);
+		}
+		break;
+	default:
+		break;
+	}
+	mutex_unlock(&shdlc->state_mutex);
+}
+
+/*
+ * Called from syscall context to establish shdlc link. Sleeps until
+ * link is ready or failure.
+ */
+static int llc_shdlc_connect(struct llc_shdlc *shdlc)
+{
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq);
+
+	pr_debug("\n");
+
+	mutex_lock(&shdlc->state_mutex);
+
+	shdlc->state = SHDLC_CONNECTING;
+	shdlc->connect_wq = &connect_wq;
+	shdlc->connect_tries = 0;
+	shdlc->connect_result = 1;
+
+	mutex_unlock(&shdlc->state_mutex);
+
+	queue_work(system_nrt_wq, &shdlc->sm_work);
+
+	wait_event(connect_wq, shdlc->connect_result != 1);
+
+	return shdlc->connect_result;
+}
+
+static void llc_shdlc_disconnect(struct llc_shdlc *shdlc)
+{
+	pr_debug("\n");
+
+	mutex_lock(&shdlc->state_mutex);
+
+	shdlc->state = SHDLC_DISCONNECTED;
+
+	mutex_unlock(&shdlc->state_mutex);
+
+	queue_work(system_nrt_wq, &shdlc->sm_work);
+}
+
+/*
+ * Receive an incoming shdlc frame. Frame has already been crc-validated.
+ * skb contains only LLC header and payload.
+ * If skb == NULL, it is a notification that the link below is dead.
+ */
+static void llc_shdlc_recv_frame(struct llc_shdlc *shdlc, struct sk_buff *skb)
+{
+	if (skb == NULL) {
+		pr_err("NULL Frame -> link is dead\n");
+		shdlc->hard_fault = -EREMOTEIO;
+	} else {
+		SHDLC_DUMP_SKB("incoming frame", skb);
+		skb_queue_tail(&shdlc->rcv_q, skb);
+	}
+
+	queue_work(system_nrt_wq, &shdlc->sm_work);
+}
+
+static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+			    rcv_to_hci_t rcv_to_hci, int tx_headroom,
+			    int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+			    llc_failure_t llc_failure)
+{
+	struct llc_shdlc *shdlc;
+
+	*rx_headroom = SHDLC_LLC_HEAD_ROOM;
+	*rx_tailroom = 0;
+
+	shdlc = kzalloc(sizeof(struct llc_shdlc), GFP_KERNEL);
+	if (shdlc == NULL)
+		return NULL;
+
+	mutex_init(&shdlc->state_mutex);
+	shdlc->state = SHDLC_DISCONNECTED;
+
+	init_timer(&shdlc->connect_timer);
+	shdlc->connect_timer.data = (unsigned long)shdlc;
+	shdlc->connect_timer.function = llc_shdlc_connect_timeout;
+
+	init_timer(&shdlc->t1_timer);
+	shdlc->t1_timer.data = (unsigned long)shdlc;
+	shdlc->t1_timer.function = llc_shdlc_t1_timeout;
+
+	init_timer(&shdlc->t2_timer);
+	shdlc->t2_timer.data = (unsigned long)shdlc;
+	shdlc->t2_timer.function = llc_shdlc_t2_timeout;
+
+	shdlc->w = SHDLC_MAX_WINDOW;
+	shdlc->srej_support = SHDLC_SREJ_SUPPORT;
+
+	skb_queue_head_init(&shdlc->rcv_q);
+	skb_queue_head_init(&shdlc->send_q);
+	skb_queue_head_init(&shdlc->ack_pending_q);
+
+	INIT_WORK(&shdlc->sm_work, llc_shdlc_sm_work);
+
+	shdlc->hdev = hdev;
+	shdlc->xmit_to_drv = xmit_to_drv;
+	shdlc->rcv_to_hci = rcv_to_hci;
+	shdlc->tx_headroom = tx_headroom;
+	shdlc->tx_tailroom = tx_tailroom;
+	shdlc->llc_failure = llc_failure;
+
+	return shdlc;
+}
+
+static void llc_shdlc_deinit(struct nfc_llc *llc)
+{
+	struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
+
+	skb_queue_purge(&shdlc->rcv_q);
+	skb_queue_purge(&shdlc->send_q);
+	skb_queue_purge(&shdlc->ack_pending_q);
+
+	kfree(shdlc);
+}
+
+static int llc_shdlc_start(struct nfc_llc *llc)
+{
+	struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
+
+	return llc_shdlc_connect(shdlc);
+}
+
+static int llc_shdlc_stop(struct nfc_llc *llc)
+{
+	struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
+
+	llc_shdlc_disconnect(shdlc);
+
+	return 0;
+}
+
+static void llc_shdlc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
+{
+	struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
+
+	llc_shdlc_recv_frame(shdlc, skb);
+}
+
+static int llc_shdlc_xmit_from_hci(struct nfc_llc *llc, struct sk_buff *skb)
+{
+	struct llc_shdlc *shdlc = nfc_llc_get_data(llc);
+
+	skb_queue_tail(&shdlc->send_q, skb);
+
+	queue_work(system_nrt_wq, &shdlc->sm_work);
+
+	return 0;
+}
+
+static struct nfc_llc_ops llc_shdlc_ops = {
+	.init = llc_shdlc_init,
+	.deinit = llc_shdlc_deinit,
+	.start = llc_shdlc_start,
+	.stop = llc_shdlc_stop,
+	.rcv_from_drv = llc_shdlc_rcv_from_drv,
+	.xmit_from_hci = llc_shdlc_xmit_from_hci,
+};
+
+int nfc_llc_shdlc_register()
+{
+	return nfc_llc_register(LLC_SHDLC_NAME, &llc_shdlc_ops);
+}
+EXPORT_SYMBOL(nfc_llc_shdlc_register);
-- 
cgit v1.2.3


From 412fda538f4b1317ecd0fbe6e5bc9124792bea88 Mon Sep 17 00:00:00 2001
From: Eric Lapuyade <eric.lapuyade@linux.intel.com>
Date: Tue, 18 Sep 2012 19:45:48 +0200
Subject: NFC: Changed HCI and PN544 HCI driver to use the new HCI LLC Core

The previous shdlc HCI driver and its header are removed from the tree.
PN544 now registers directly with HCI and passes the name of the llc it
requires (shdlc).
HCI instantiation now allocates the required llc instance. The llc is
started when the HCI device is brought up.

Signed-off-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/Kconfig     |   2 +-
 drivers/nfc/pn544_hci.c |  73 ++--
 include/net/nfc/hci.h   |   3 +
 include/net/nfc/shdlc.h | 106 ------
 net/nfc/hci/Makefile    |   2 +-
 net/nfc/hci/core.c      | 210 ++++++-----
 net/nfc/hci/llc_shdlc.c |   2 +-
 net/nfc/hci/shdlc.c     | 918 ------------------------------------------------
 8 files changed, 164 insertions(+), 1152 deletions(-)
 delete mode 100644 include/net/nfc/shdlc.h
 delete mode 100644 net/nfc/hci/shdlc.c

(limited to 'include')

diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index 3b20b73ee649..89c57d1a7cad 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -19,7 +19,7 @@ config PN544_NFC
 
 config PN544_HCI_NFC
 	tristate "HCI PN544 NFC driver"
-	depends on I2C && NFC_SHDLC
+	depends on I2C && NFC_HCI && NFC_SHDLC
 	select CRC_CCITT
 	default n
 	---help---
diff --git a/drivers/nfc/pn544_hci.c b/drivers/nfc/pn544_hci.c
index a176d1f7bbf3..e681da2e3413 100644
--- a/drivers/nfc/pn544_hci.c
+++ b/drivers/nfc/pn544_hci.c
@@ -29,7 +29,7 @@
 
 #include <linux/nfc.h>
 #include <net/nfc/hci.h>
-#include <net/nfc/shdlc.h>
+#include <net/nfc/llc.h>
 
 #include <linux/nfc/pn544.h>
 
@@ -133,7 +133,7 @@ static struct nfc_hci_gate pn544_gates[] = {
 
 struct pn544_hci_info {
 	struct i2c_client *i2c_dev;
-	struct nfc_shdlc *shdlc;
+	struct nfc_hci_dev *hdev;
 
 	enum pn544_state state;
 
@@ -362,21 +362,21 @@ static irqreturn_t pn544_hci_irq_thread_fn(int irq, void *dev_id)
 	if (r == -EREMOTEIO) {
 		info->hard_fault = r;
 
-		nfc_shdlc_recv_frame(info->shdlc, NULL);
+		nfc_hci_recv_frame(info->hdev, NULL);
 
 		return IRQ_HANDLED;
 	} else if ((r == -ENOMEM) || (r == -EBADMSG)) {
 		return IRQ_HANDLED;
 	}
 
-	nfc_shdlc_recv_frame(info->shdlc, skb);
+	nfc_hci_recv_frame(info->hdev, skb);
 
 	return IRQ_HANDLED;
 }
 
-static int pn544_hci_open(struct nfc_shdlc *shdlc)
+static int pn544_hci_open(struct nfc_hci_dev *hdev)
 {
-	struct pn544_hci_info *info = nfc_shdlc_get_clientdata(shdlc);
+	struct pn544_hci_info *info = nfc_hci_get_clientdata(hdev);
 	int r = 0;
 
 	mutex_lock(&info->info_lock);
@@ -396,9 +396,9 @@ out:
 	return r;
 }
 
-static void pn544_hci_close(struct nfc_shdlc *shdlc)
+static void pn544_hci_close(struct nfc_hci_dev *hdev)
 {
-	struct pn544_hci_info *info = nfc_shdlc_get_clientdata(shdlc);
+	struct pn544_hci_info *info = nfc_hci_get_clientdata(hdev);
 
 	mutex_lock(&info->info_lock);
 
@@ -413,9 +413,8 @@ out:
 	mutex_unlock(&info->info_lock);
 }
 
-static int pn544_hci_ready(struct nfc_shdlc *shdlc)
+static int pn544_hci_ready(struct nfc_hci_dev *hdev)
 {
-	struct nfc_hci_dev *hdev = nfc_shdlc_get_hci_dev(shdlc);
 	struct sk_buff *skb;
 	static struct hw_config {
 		u8 adr[2];
@@ -601,9 +600,9 @@ static void pn544_hci_remove_len_crc(struct sk_buff *skb)
 	skb_trim(skb, PN544_FRAME_TAILROOM);
 }
 
-static int pn544_hci_xmit(struct nfc_shdlc *shdlc, struct sk_buff *skb)
+static int pn544_hci_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 {
-	struct pn544_hci_info *info = nfc_shdlc_get_clientdata(shdlc);
+	struct pn544_hci_info *info = nfc_hci_get_clientdata(hdev);
 	struct i2c_client *client = info->i2c_dev;
 	int r;
 
@@ -617,10 +616,9 @@ static int pn544_hci_xmit(struct nfc_shdlc *shdlc, struct sk_buff *skb)
 	return r;
 }
 
-static int pn544_hci_start_poll(struct nfc_shdlc *shdlc,
+static int pn544_hci_start_poll(struct nfc_hci_dev *hdev,
 				u32 im_protocols, u32 tm_protocols)
 {
-	struct nfc_hci_dev *hdev = nfc_shdlc_get_hci_dev(shdlc);
 	u8 phases = 0;
 	int r;
 	u8 duration[2];
@@ -671,7 +669,7 @@ static int pn544_hci_start_poll(struct nfc_shdlc *shdlc,
 	return r;
 }
 
-static int pn544_hci_target_from_gate(struct nfc_shdlc *shdlc, u8 gate,
+static int pn544_hci_target_from_gate(struct nfc_hci_dev *hdev, u8 gate,
 				      struct nfc_target *target)
 {
 	switch (gate) {
@@ -689,11 +687,10 @@ static int pn544_hci_target_from_gate(struct nfc_shdlc *shdlc, u8 gate,
 	return 0;
 }
 
-static int pn544_hci_complete_target_discovered(struct nfc_shdlc *shdlc,
+static int pn544_hci_complete_target_discovered(struct nfc_hci_dev *hdev,
 						u8 gate,
 						struct nfc_target *target)
 {
-	struct nfc_hci_dev *hdev = nfc_shdlc_get_hci_dev(shdlc);
 	struct sk_buff *uid_skb;
 	int r = 0;
 
@@ -765,13 +762,12 @@ static void pn544_hci_data_exchange_cb(void *context, struct sk_buff *skb,
  * <= 0: driver handled the data exchange
  *    1: driver doesn't especially handle, please do standard processing
  */
-static int pn544_hci_data_exchange(struct nfc_shdlc *shdlc,
+static int pn544_hci_data_exchange(struct nfc_hci_dev *hdev,
 				   struct nfc_target *target,
 				   struct sk_buff *skb, data_exchange_cb_t cb,
 				   void *cb_context)
 {
-	struct pn544_hci_info *info = nfc_shdlc_get_clientdata(shdlc);
-	struct nfc_hci_dev *hdev = nfc_shdlc_get_hci_dev(shdlc);
+	struct pn544_hci_info *info = nfc_hci_get_clientdata(hdev);
 
 	pr_info(DRIVER_DESC ": %s for gate=%d\n", __func__,
 		target->hci_reader_gate);
@@ -824,17 +820,15 @@ static int pn544_hci_data_exchange(struct nfc_shdlc *shdlc,
 	}
 }
 
-static int pn544_hci_check_presence(struct nfc_shdlc *shdlc,
+static int pn544_hci_check_presence(struct nfc_hci_dev *hdev,
 				   struct nfc_target *target)
 {
-	struct nfc_hci_dev *hdev = nfc_shdlc_get_hci_dev(shdlc);
-
 	return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
 				PN544_RF_READER_CMD_PRESENCE_CHECK,
 				NULL, 0, NULL);
 }
 
-static struct nfc_shdlc_ops pn544_shdlc_ops = {
+static struct nfc_hci_ops pn544_hci_ops = {
 	.open = pn544_hci_open,
 	.close = pn544_hci_close,
 	.hci_ready = pn544_hci_ready,
@@ -926,23 +920,30 @@ static int __devinit pn544_hci_probe(struct i2c_client *client,
 		    NFC_PROTO_ISO14443_B_MASK |
 		    NFC_PROTO_NFC_DEP_MASK;
 
-	info->shdlc = nfc_shdlc_allocate(&pn544_shdlc_ops,
-					 &init_data, protocols,
-					 PN544_FRAME_HEADROOM + PN544_CMDS_HEADROOM,
-					 PN544_FRAME_TAILROOM,
-					 PN544_HCI_LLC_MAX_PAYLOAD,
-					 dev_name(&client->dev));
-	if (!info->shdlc) {
-		dev_err(&client->dev, "Cannot allocate nfc shdlc.\n");
+	info->hdev = nfc_hci_allocate_device(&pn544_hci_ops, &init_data,
+					     protocols, LLC_SHDLC_NAME,
+					     PN544_FRAME_HEADROOM +
+					     PN544_CMDS_HEADROOM,
+					     PN544_FRAME_TAILROOM,
+					     PN544_HCI_LLC_MAX_PAYLOAD);
+	if (!info->hdev) {
+		dev_err(&client->dev, "Cannot allocate nfc hdev.\n");
 		r = -ENOMEM;
-		goto err_allocshdlc;
+		goto err_alloc_hdev;
 	}
 
-	nfc_shdlc_set_clientdata(info->shdlc, info);
+	nfc_hci_set_clientdata(info->hdev, info);
+
+	r = nfc_hci_register_device(info->hdev);
+	if (r)
+		goto err_regdev;
 
 	return 0;
 
-err_allocshdlc:
+err_regdev:
+	nfc_hci_free_device(info->hdev);
+
+err_alloc_hdev:
 	free_irq(client->irq, info);
 
 err_rti:
@@ -963,7 +964,7 @@ static __devexit int pn544_hci_remove(struct i2c_client *client)
 
 	dev_dbg(&client->dev, "%s\n", __func__);
 
-	nfc_shdlc_free(info->shdlc);
+	nfc_hci_free_device(info->hdev);
 
 	if (info->state != PN544_ST_COLD) {
 		if (pdata->disable)
diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 6cee6e21fc46..9b5ed2d94d60 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -87,6 +87,8 @@ struct nfc_hci_dev {
 
 	struct nfc_hci_ops *ops;
 
+	struct nfc_llc *llc;
+
 	struct nfc_hci_init_data init_data;
 
 	void *clientdata;
@@ -113,6 +115,7 @@ struct nfc_hci_dev {
 struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
 					    struct nfc_hci_init_data *init_data,
 					    u32 protocols,
+					    const char *llc_name,
 					    int tx_headroom,
 					    int tx_tailroom,
 					    int max_link_payload);
diff --git a/include/net/nfc/shdlc.h b/include/net/nfc/shdlc.h
deleted file mode 100644
index fe5e19829352..000000000000
--- a/include/net/nfc/shdlc.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-#ifndef __NFC_SHDLC_H
-#define __NFC_SHDLC_H
-
-struct nfc_shdlc;
-
-struct nfc_shdlc_ops {
-	int (*open) (struct nfc_shdlc *shdlc);
-	void (*close) (struct nfc_shdlc *shdlc);
-	int (*hci_ready) (struct nfc_shdlc *shdlc);
-	int (*xmit) (struct nfc_shdlc *shdlc, struct sk_buff *skb);
-	int (*start_poll) (struct nfc_shdlc *shdlc,
-			   u32 im_protocols, u32 tm_protocols);
-	int (*target_from_gate) (struct nfc_shdlc *shdlc, u8 gate,
-				 struct nfc_target *target);
-	int (*complete_target_discovered) (struct nfc_shdlc *shdlc, u8 gate,
-					   struct nfc_target *target);
-	int (*data_exchange) (struct nfc_shdlc *shdlc,
-			      struct nfc_target *target, struct sk_buff *skb,
-			      data_exchange_cb_t cb, void *cb_context);
-	int (*check_presence)(struct nfc_shdlc *shdlc,
-			      struct nfc_target *target);
-};
-
-enum shdlc_state {
-	SHDLC_DISCONNECTED = 0,
-	SHDLC_CONNECTING = 1,
-	SHDLC_NEGOCIATING = 2,
-	SHDLC_CONNECTED = 3
-};
-
-struct nfc_shdlc {
-	struct mutex state_mutex;
-	enum shdlc_state state;
-	int hard_fault;
-
-	struct nfc_hci_dev *hdev;
-
-	wait_queue_head_t *connect_wq;
-	int connect_tries;
-	int connect_result;
-	struct timer_list connect_timer;/* aka T3 in spec 10.6.1 */
-
-	u8 w;				/* window size */
-	bool srej_support;
-
-	struct timer_list t1_timer;	/* send ack timeout */
-	bool t1_active;
-
-	struct timer_list t2_timer;	/* guard/retransmit timeout */
-	bool t2_active;
-
-	int ns;				/* next seq num for send */
-	int nr;				/* next expected seq num for receive */
-	int dnr;			/* oldest sent unacked seq num */
-
-	struct sk_buff_head rcv_q;
-
-	struct sk_buff_head send_q;
-	bool rnr;			/* other side is not ready to receive */
-
-	struct sk_buff_head ack_pending_q;
-
-	struct work_struct sm_work;
-
-	struct nfc_shdlc_ops *ops;
-
-	int client_headroom;
-	int client_tailroom;
-
-	void *clientdata;
-};
-
-void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb);
-
-struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops,
-				     struct nfc_hci_init_data *init_data,
-				     u32 protocols,
-				     int tx_headroom, int tx_tailroom,
-				     int max_link_payload, const char *devname);
-
-void nfc_shdlc_free(struct nfc_shdlc *shdlc);
-
-void nfc_shdlc_set_clientdata(struct nfc_shdlc *shdlc, void *clientdata);
-void *nfc_shdlc_get_clientdata(struct nfc_shdlc *shdlc);
-struct nfc_hci_dev *nfc_shdlc_get_hci_dev(struct nfc_shdlc *shdlc);
-
-#endif /* __NFC_SHDLC_H */
diff --git a/net/nfc/hci/Makefile b/net/nfc/hci/Makefile
index c4d65479629b..c5dbb6891b24 100644
--- a/net/nfc/hci/Makefile
+++ b/net/nfc/hci/Makefile
@@ -5,4 +5,4 @@
 obj-$(CONFIG_NFC_HCI) += hci.o
 
 hci-y			:= core.o hcp.o command.o llc.o llc_nop.o
-hci-$(CONFIG_NFC_SHDLC) += shdlc.o llc_shdlc.o
+hci-$(CONFIG_NFC_SHDLC) += llc_shdlc.o
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 069e2d6056e5..c1129c22d835 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -78,7 +78,7 @@ next_msg:
 
 	pr_debug("msg_tx_queue has a cmd to send\n");
 	while ((skb = skb_dequeue(&msg->msg_frags)) != NULL) {
-		r = hdev->ops->xmit(hdev, skb);
+		r = nfc_llc_xmit_from_hci(hdev->llc, skb);
 		if (r < 0) {
 			kfree_skb(skb);
 			skb_queue_purge(&msg->msg_frags);
@@ -469,29 +469,38 @@ static int hci_dev_up(struct nfc_dev *nfc_dev)
 			return r;
 	}
 
+	r = nfc_llc_start(hdev->llc);
+	if (r < 0)
+		goto exit_close;
+
 	r = hci_dev_session_init(hdev);
 	if (r < 0)
-		goto exit;
+		goto exit_llc;
 
 	r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
 			       NFC_HCI_EVT_END_OPERATION, NULL, 0);
 	if (r < 0)
-		goto exit;
+		goto exit_llc;
 
 	if (hdev->ops->hci_ready) {
 		r = hdev->ops->hci_ready(hdev);
 		if (r < 0)
-			goto exit;
+			goto exit_llc;
 	}
 
 	r = hci_dev_version(hdev);
 	if (r < 0)
-		goto exit;
+		goto exit_llc;
+
+	return 0;
+
+exit_llc:
+	nfc_llc_stop(hdev->llc);
+
+exit_close:
+	if (hdev->ops->close)
+		hdev->ops->close(hdev);
 
-exit:
-	if (r < 0)
-		if (hdev->ops->close)
-			hdev->ops->close(hdev);
 	return r;
 }
 
@@ -499,6 +508,8 @@ static int hci_dev_down(struct nfc_dev *nfc_dev)
 {
 	struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
 
+	nfc_llc_stop(hdev->llc);
+
 	if (hdev->ops->close)
 		hdev->ops->close(hdev);
 
@@ -620,6 +631,93 @@ static int hci_check_presence(struct nfc_dev *nfc_dev,
 	return 0;
 }
 
+static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err)
+{
+	mutex_lock(&hdev->msg_tx_mutex);
+
+	if (hdev->cmd_pending_msg == NULL) {
+		nfc_driver_failure(hdev->ndev, err);
+		goto exit;
+	}
+
+	__nfc_hci_cmd_completion(hdev, err, NULL);
+
+exit:
+	mutex_unlock(&hdev->msg_tx_mutex);
+}
+
+static void nfc_hci_llc_failure(struct nfc_hci_dev *hdev, int err)
+{
+	nfc_hci_failure(hdev, err);
+}
+
+static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hcp_packet *packet;
+	u8 type;
+	u8 instruction;
+	struct sk_buff *hcp_skb;
+	u8 pipe;
+	struct sk_buff *frag_skb;
+	int msg_len;
+
+	packet = (struct hcp_packet *)skb->data;
+	if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) {
+		skb_queue_tail(&hdev->rx_hcp_frags, skb);
+		return;
+	}
+
+	/* it's the last fragment. Does it need re-aggregation? */
+	if (skb_queue_len(&hdev->rx_hcp_frags)) {
+		pipe = packet->header & NFC_HCI_FRAGMENT;
+		skb_queue_tail(&hdev->rx_hcp_frags, skb);
+
+		msg_len = 0;
+		skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
+			msg_len += (frag_skb->len -
+				    NFC_HCI_HCP_PACKET_HEADER_LEN);
+		}
+
+		hcp_skb = nfc_alloc_recv_skb(NFC_HCI_HCP_PACKET_HEADER_LEN +
+					     msg_len, GFP_KERNEL);
+		if (hcp_skb == NULL) {
+			nfc_hci_failure(hdev, -ENOMEM);
+			return;
+		}
+
+		*skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe;
+
+		skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
+			msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN;
+			memcpy(skb_put(hcp_skb, msg_len),
+			       frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN,
+			       msg_len);
+		}
+
+		skb_queue_purge(&hdev->rx_hcp_frags);
+	} else {
+		packet->header &= NFC_HCI_FRAGMENT;
+		hcp_skb = skb;
+	}
+
+	/* if this is a response, dispatch immediately to
+	 * unblock waiting cmd context. Otherwise, enqueue to dispatch
+	 * in separate context where handler can also execute command.
+	 */
+	packet = (struct hcp_packet *)hcp_skb->data;
+	type = HCP_MSG_GET_TYPE(packet->message.header);
+	if (type == NFC_HCI_HCP_RESPONSE) {
+		pipe = packet->header;
+		instruction = HCP_MSG_GET_CMD(packet->message.header);
+		skb_pull(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN +
+			 NFC_HCI_HCP_MESSAGE_HEADER_LEN);
+		nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb);
+	} else {
+		skb_queue_tail(&hdev->msg_rx_queue, hcp_skb);
+		queue_work(system_nrt_wq, &hdev->msg_rx_work);
+	}
+}
+
 static struct nfc_ops hci_nfc_ops = {
 	.dev_up = hci_dev_up,
 	.dev_down = hci_dev_down,
@@ -634,6 +732,7 @@ static struct nfc_ops hci_nfc_ops = {
 struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
 					    struct nfc_hci_init_data *init_data,
 					    u32 protocols,
+					    const char *llc_name,
 					    int tx_headroom,
 					    int tx_tailroom,
 					    int max_link_payload)
@@ -650,10 +749,19 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
 	if (hdev == NULL)
 		return NULL;
 
+	hdev->llc = nfc_llc_allocate(llc_name, hdev, ops->xmit,
+				     nfc_hci_recv_from_llc, tx_headroom,
+				     tx_tailroom, nfc_hci_llc_failure);
+	if (hdev->llc == NULL) {
+		kfree(hdev);
+		return NULL;
+	}
+
 	hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols,
 					 tx_headroom + HCI_CMDS_HEADROOM,
 					 tx_tailroom);
 	if (!hdev->ndev) {
+		nfc_llc_free(hdev->llc);
 		kfree(hdev);
 		return NULL;
 	}
@@ -673,6 +781,7 @@ EXPORT_SYMBOL(nfc_hci_allocate_device);
 void nfc_hci_free_device(struct nfc_hci_dev *hdev)
 {
 	nfc_free_device(hdev->ndev);
+	nfc_llc_free(hdev->llc);
 	kfree(hdev);
 }
 EXPORT_SYMBOL(nfc_hci_free_device);
@@ -733,92 +842,15 @@ void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev)
 }
 EXPORT_SYMBOL(nfc_hci_get_clientdata);
 
-static void nfc_hci_failure(struct nfc_hci_dev *hdev, int err)
-{
-	mutex_lock(&hdev->msg_tx_mutex);
-
-	if (hdev->cmd_pending_msg == NULL) {
-		nfc_driver_failure(hdev->ndev, err);
-		goto exit;
-	}
-
-	__nfc_hci_cmd_completion(hdev, err, NULL);
-
-exit:
-	mutex_unlock(&hdev->msg_tx_mutex);
-}
-
 void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err)
 {
 	nfc_hci_failure(hdev, err);
 }
 EXPORT_SYMBOL(nfc_hci_driver_failure);
 
-void nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+void inline nfc_hci_recv_frame(struct nfc_hci_dev *hdev, struct sk_buff *skb)
 {
-	struct hcp_packet *packet;
-	u8 type;
-	u8 instruction;
-	struct sk_buff *hcp_skb;
-	u8 pipe;
-	struct sk_buff *frag_skb;
-	int msg_len;
-
-	packet = (struct hcp_packet *)skb->data;
-	if ((packet->header & ~NFC_HCI_FRAGMENT) == 0) {
-		skb_queue_tail(&hdev->rx_hcp_frags, skb);
-		return;
-	}
-
-	/* it's the last fragment. Does it need re-aggregation? */
-	if (skb_queue_len(&hdev->rx_hcp_frags)) {
-		pipe = packet->header & NFC_HCI_FRAGMENT;
-		skb_queue_tail(&hdev->rx_hcp_frags, skb);
-
-		msg_len = 0;
-		skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
-			msg_len += (frag_skb->len -
-				    NFC_HCI_HCP_PACKET_HEADER_LEN);
-		}
-
-		hcp_skb = nfc_alloc_recv_skb(NFC_HCI_HCP_PACKET_HEADER_LEN +
-					     msg_len, GFP_KERNEL);
-		if (hcp_skb == NULL) {
-			nfc_hci_failure(hdev, -ENOMEM);
-			return;
-		}
-
-		*skb_put(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN) = pipe;
-
-		skb_queue_walk(&hdev->rx_hcp_frags, frag_skb) {
-			msg_len = frag_skb->len - NFC_HCI_HCP_PACKET_HEADER_LEN;
-			memcpy(skb_put(hcp_skb, msg_len),
-			       frag_skb->data + NFC_HCI_HCP_PACKET_HEADER_LEN,
-			       msg_len);
-		}
-
-		skb_queue_purge(&hdev->rx_hcp_frags);
-	} else {
-		packet->header &= NFC_HCI_FRAGMENT;
-		hcp_skb = skb;
-	}
-
-	/* if this is a response, dispatch immediately to
-	 * unblock waiting cmd context. Otherwise, enqueue to dispatch
-	 * in separate context where handler can also execute command.
-	 */
-	packet = (struct hcp_packet *)hcp_skb->data;
-	type = HCP_MSG_GET_TYPE(packet->message.header);
-	if (type == NFC_HCI_HCP_RESPONSE) {
-		pipe = packet->header;
-		instruction = HCP_MSG_GET_CMD(packet->message.header);
-		skb_pull(hcp_skb, NFC_HCI_HCP_PACKET_HEADER_LEN +
-			 NFC_HCI_HCP_MESSAGE_HEADER_LEN);
-		nfc_hci_hcp_message_rx(hdev, pipe, type, instruction, hcp_skb);
-	} else {
-		skb_queue_tail(&hdev->msg_rx_queue, hcp_skb);
-		queue_work(system_nrt_wq, &hdev->msg_rx_work);
-	}
+	nfc_llc_rcv_from_drv(hdev->llc, skb);
 }
 EXPORT_SYMBOL(nfc_hci_recv_frame);
 
@@ -832,7 +864,7 @@ static void __exit nfc_hci_exit(void)
 	nfc_llc_exit();
 }
 
-module_init(nfc_hci_init);
+subsys_initcall(nfc_hci_init);
 module_exit(nfc_hci_exit);
 
 MODULE_LICENSE("GPL");
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index bb191100ee96..fad6cd18d613 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -535,7 +535,7 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc)
 
 		pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns,
 			 shdlc->nr);
-	/*	SHDLC_DUMP_SKB("shdlc frame written", skb); */
+		SHDLC_DUMP_SKB("shdlc frame written", skb);
 
 		r = shdlc->xmit_to_drv(shdlc->hdev, skb);
 		if (r < 0) {
diff --git a/net/nfc/hci/shdlc.c b/net/nfc/hci/shdlc.c
deleted file mode 100644
index c63af7d3e859..000000000000
--- a/net/nfc/hci/shdlc.c
+++ /dev/null
@@ -1,918 +0,0 @@
-/*
- * Copyright (C) 2012  Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-#define pr_fmt(fmt) "shdlc: %s: " fmt, __func__
-
-#include <linux/sched.h>
-#include <linux/export.h>
-#include <linux/wait.h>
-#include <linux/slab.h>
-#include <linux/skbuff.h>
-
-#include <net/nfc/hci.h>
-#include <net/nfc/shdlc.h>
-
-#define SHDLC_LLC_HEAD_ROOM	2
-
-#define SHDLC_MAX_WINDOW	4
-#define SHDLC_SREJ_SUPPORT	false
-
-#define SHDLC_CONTROL_HEAD_MASK	0xe0
-#define SHDLC_CONTROL_HEAD_I	0x80
-#define SHDLC_CONTROL_HEAD_I2	0xa0
-#define SHDLC_CONTROL_HEAD_S	0xc0
-#define SHDLC_CONTROL_HEAD_U	0xe0
-
-#define SHDLC_CONTROL_NS_MASK	0x38
-#define SHDLC_CONTROL_NR_MASK	0x07
-#define SHDLC_CONTROL_TYPE_MASK	0x18
-
-#define SHDLC_CONTROL_M_MASK	0x1f
-
-enum sframe_type {
-	S_FRAME_RR = 0x00,
-	S_FRAME_REJ = 0x01,
-	S_FRAME_RNR = 0x02,
-	S_FRAME_SREJ = 0x03
-};
-
-enum uframe_modifier {
-	U_FRAME_UA = 0x06,
-	U_FRAME_RSET = 0x19
-};
-
-#define SHDLC_CONNECT_VALUE_MS	5
-#define SHDLC_T1_VALUE_MS(w)	((5 * w) / 4)
-#define SHDLC_T2_VALUE_MS	300
-
-#define SHDLC_DUMP_SKB(info, skb)				  \
-do {								  \
-	pr_debug("%s:\n", info);				  \
-	print_hex_dump(KERN_DEBUG, "shdlc: ", DUMP_PREFIX_OFFSET, \
-		       16, 1, skb->data, skb->len, 0);		  \
-} while (0)
-
-/* checks x < y <= z modulo 8 */
-static bool nfc_shdlc_x_lt_y_lteq_z(int x, int y, int z)
-{
-	if (x < z)
-		return ((x < y) && (y <= z)) ? true : false;
-	else
-		return ((y > x) || (y <= z)) ? true : false;
-}
-
-/* checks x <= y < z modulo 8 */
-static bool nfc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
-{
-	if (x <= z)
-		return ((x <= y) && (y < z)) ? true : false;
-	else			/* x > z -> z+8 > x */
-		return ((y >= x) || (y < z)) ? true : false;
-}
-
-static struct sk_buff *nfc_shdlc_alloc_skb(struct nfc_shdlc *shdlc,
-					   int payload_len)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM +
-			shdlc->client_tailroom + payload_len, GFP_KERNEL);
-	if (skb)
-		skb_reserve(skb, shdlc->client_headroom + SHDLC_LLC_HEAD_ROOM);
-
-	return skb;
-}
-
-/* immediately sends an S frame. */
-static int nfc_shdlc_send_s_frame(struct nfc_shdlc *shdlc,
-				  enum sframe_type sframe_type, int nr)
-{
-	int r;
-	struct sk_buff *skb;
-
-	pr_debug("sframe_type=%d nr=%d\n", sframe_type, nr);
-
-	skb = nfc_shdlc_alloc_skb(shdlc, 0);
-	if (skb == NULL)
-		return -ENOMEM;
-
-	*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_S | (sframe_type << 3) | nr;
-
-	r = shdlc->ops->xmit(shdlc, skb);
-
-	kfree_skb(skb);
-
-	return r;
-}
-
-/* immediately sends an U frame. skb may contain optional payload */
-static int nfc_shdlc_send_u_frame(struct nfc_shdlc *shdlc,
-				  struct sk_buff *skb,
-				  enum uframe_modifier uframe_modifier)
-{
-	int r;
-
-	pr_debug("uframe_modifier=%d\n", uframe_modifier);
-
-	*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_U | uframe_modifier;
-
-	r = shdlc->ops->xmit(shdlc, skb);
-
-	kfree_skb(skb);
-
-	return r;
-}
-
-/*
- * Free ack_pending frames until y_nr - 1, and reset t2 according to
- * the remaining oldest ack_pending frame sent time
- */
-static void nfc_shdlc_reset_t2(struct nfc_shdlc *shdlc, int y_nr)
-{
-	struct sk_buff *skb;
-	int dnr = shdlc->dnr;	/* MUST initially be < y_nr */
-
-	pr_debug("release ack pending up to frame %d excluded\n", y_nr);
-
-	while (dnr != y_nr) {
-		pr_debug("release ack pending frame %d\n", dnr);
-
-		skb = skb_dequeue(&shdlc->ack_pending_q);
-		kfree_skb(skb);
-
-		dnr = (dnr + 1) % 8;
-	}
-
-	if (skb_queue_empty(&shdlc->ack_pending_q)) {
-		if (shdlc->t2_active) {
-			del_timer_sync(&shdlc->t2_timer);
-			shdlc->t2_active = false;
-
-			pr_debug
-			    ("All sent frames acked. Stopped T2(retransmit)\n");
-		}
-	} else {
-		skb = skb_peek(&shdlc->ack_pending_q);
-
-		mod_timer(&shdlc->t2_timer, *(unsigned long *)skb->cb +
-			  msecs_to_jiffies(SHDLC_T2_VALUE_MS));
-		shdlc->t2_active = true;
-
-		pr_debug
-		    ("Start T2(retransmit) for remaining unacked sent frames\n");
-	}
-}
-
-/*
- * Receive validated frames from lower layer. skb contains HCI payload only.
- * Handle according to algorithm at spec:10.8.2
- */
-static void nfc_shdlc_rcv_i_frame(struct nfc_shdlc *shdlc,
-				  struct sk_buff *skb, int ns, int nr)
-{
-	int x_ns = ns;
-	int y_nr = nr;
-
-	pr_debug("recvd I-frame %d, remote waiting frame %d\n", ns, nr);
-
-	if (shdlc->state != SHDLC_CONNECTED)
-		goto exit;
-
-	if (x_ns != shdlc->nr) {
-		nfc_shdlc_send_s_frame(shdlc, S_FRAME_REJ, shdlc->nr);
-		goto exit;
-	}
-
-	if (shdlc->t1_active == false) {
-		shdlc->t1_active = true;
-		mod_timer(&shdlc->t1_timer,
-			  msecs_to_jiffies(SHDLC_T1_VALUE_MS(shdlc->w)));
-		pr_debug("(re)Start T1(send ack)\n");
-	}
-
-	if (skb->len) {
-		nfc_hci_recv_frame(shdlc->hdev, skb);
-		skb = NULL;
-	}
-
-	shdlc->nr = (shdlc->nr + 1) % 8;
-
-	if (nfc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) {
-		nfc_shdlc_reset_t2(shdlc, y_nr);
-
-		shdlc->dnr = y_nr;
-	}
-
-exit:
-	kfree_skb(skb);
-}
-
-static void nfc_shdlc_rcv_ack(struct nfc_shdlc *shdlc, int y_nr)
-{
-	pr_debug("remote acked up to frame %d excluded\n", y_nr);
-
-	if (nfc_shdlc_x_lt_y_lteq_z(shdlc->dnr, y_nr, shdlc->ns)) {
-		nfc_shdlc_reset_t2(shdlc, y_nr);
-		shdlc->dnr = y_nr;
-	}
-}
-
-static void nfc_shdlc_requeue_ack_pending(struct nfc_shdlc *shdlc)
-{
-	struct sk_buff *skb;
-
-	pr_debug("ns reset to %d\n", shdlc->dnr);
-
-	while ((skb = skb_dequeue_tail(&shdlc->ack_pending_q))) {
-		skb_pull(skb, 1);  /* remove control field */
-		skb_queue_head(&shdlc->send_q, skb);
-	}
-	shdlc->ns = shdlc->dnr;
-}
-
-static void nfc_shdlc_rcv_rej(struct nfc_shdlc *shdlc, int y_nr)
-{
-	struct sk_buff *skb;
-
-	pr_debug("remote asks retransmition from frame %d\n", y_nr);
-
-	if (nfc_shdlc_x_lteq_y_lt_z(shdlc->dnr, y_nr, shdlc->ns)) {
-		if (shdlc->t2_active) {
-			del_timer_sync(&shdlc->t2_timer);
-			shdlc->t2_active = false;
-			pr_debug("Stopped T2(retransmit)\n");
-		}
-
-		if (shdlc->dnr != y_nr) {
-			while ((shdlc->dnr = ((shdlc->dnr + 1) % 8)) != y_nr) {
-				skb = skb_dequeue(&shdlc->ack_pending_q);
-				kfree_skb(skb);
-			}
-		}
-
-		nfc_shdlc_requeue_ack_pending(shdlc);
-	}
-}
-
-/* See spec RR:10.8.3 REJ:10.8.4 */
-static void nfc_shdlc_rcv_s_frame(struct nfc_shdlc *shdlc,
-				  enum sframe_type s_frame_type, int nr)
-{
-	struct sk_buff *skb;
-
-	if (shdlc->state != SHDLC_CONNECTED)
-		return;
-
-	switch (s_frame_type) {
-	case S_FRAME_RR:
-		nfc_shdlc_rcv_ack(shdlc, nr);
-		if (shdlc->rnr == true) {	/* see SHDLC 10.7.7 */
-			shdlc->rnr = false;
-			if (shdlc->send_q.qlen == 0) {
-				skb = nfc_shdlc_alloc_skb(shdlc, 0);
-				if (skb)
-					skb_queue_tail(&shdlc->send_q, skb);
-			}
-		}
-		break;
-	case S_FRAME_REJ:
-		nfc_shdlc_rcv_rej(shdlc, nr);
-		break;
-	case S_FRAME_RNR:
-		nfc_shdlc_rcv_ack(shdlc, nr);
-		shdlc->rnr = true;
-		break;
-	default:
-		break;
-	}
-}
-
-static void nfc_shdlc_connect_complete(struct nfc_shdlc *shdlc, int r)
-{
-	pr_debug("result=%d\n", r);
-
-	del_timer_sync(&shdlc->connect_timer);
-
-	if (r == 0) {
-		shdlc->ns = 0;
-		shdlc->nr = 0;
-		shdlc->dnr = 0;
-
-		shdlc->state = SHDLC_CONNECTED;
-	} else {
-		shdlc->state = SHDLC_DISCONNECTED;
-	}
-
-	shdlc->connect_result = r;
-
-	wake_up(shdlc->connect_wq);
-}
-
-static int nfc_shdlc_connect_initiate(struct nfc_shdlc *shdlc)
-{
-	struct sk_buff *skb;
-
-	pr_debug("\n");
-
-	skb = nfc_shdlc_alloc_skb(shdlc, 2);
-	if (skb == NULL)
-		return -ENOMEM;
-
-	*skb_put(skb, 1) = SHDLC_MAX_WINDOW;
-	*skb_put(skb, 1) = SHDLC_SREJ_SUPPORT ? 1 : 0;
-
-	return nfc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
-}
-
-static int nfc_shdlc_connect_send_ua(struct nfc_shdlc *shdlc)
-{
-	struct sk_buff *skb;
-
-	pr_debug("\n");
-
-	skb = nfc_shdlc_alloc_skb(shdlc, 0);
-	if (skb == NULL)
-		return -ENOMEM;
-
-	return nfc_shdlc_send_u_frame(shdlc, skb, U_FRAME_UA);
-}
-
-static void nfc_shdlc_rcv_u_frame(struct nfc_shdlc *shdlc,
-				  struct sk_buff *skb,
-				  enum uframe_modifier u_frame_modifier)
-{
-	u8 w = SHDLC_MAX_WINDOW;
-	bool srej_support = SHDLC_SREJ_SUPPORT;
-	int r;
-
-	pr_debug("u_frame_modifier=%d\n", u_frame_modifier);
-
-	switch (u_frame_modifier) {
-	case U_FRAME_RSET:
-		if (shdlc->state == SHDLC_NEGOCIATING) {
-			/* we sent RSET, but chip wants to negociate */
-			if (skb->len > 0)
-				w = skb->data[0];
-
-			if (skb->len > 1)
-				srej_support = skb->data[1] & 0x01 ? true :
-					       false;
-
-			if ((w <= SHDLC_MAX_WINDOW) &&
-			    (SHDLC_SREJ_SUPPORT || (srej_support == false))) {
-				shdlc->w = w;
-				shdlc->srej_support = srej_support;
-				r = nfc_shdlc_connect_send_ua(shdlc);
-				nfc_shdlc_connect_complete(shdlc, r);
-			}
-		} else if (shdlc->state == SHDLC_CONNECTED) {
-			/*
-			 * Chip wants to reset link. This is unexpected and
-			 * unsupported.
-			 */
-			shdlc->hard_fault = -ECONNRESET;
-		}
-		break;
-	case U_FRAME_UA:
-		if ((shdlc->state == SHDLC_CONNECTING &&
-		     shdlc->connect_tries > 0) ||
-		    (shdlc->state == SHDLC_NEGOCIATING))
-			nfc_shdlc_connect_complete(shdlc, 0);
-		break;
-	default:
-		break;
-	}
-
-	kfree_skb(skb);
-}
-
-static void nfc_shdlc_handle_rcv_queue(struct nfc_shdlc *shdlc)
-{
-	struct sk_buff *skb;
-	u8 control;
-	int nr;
-	int ns;
-	enum sframe_type s_frame_type;
-	enum uframe_modifier u_frame_modifier;
-
-	if (shdlc->rcv_q.qlen)
-		pr_debug("rcvQlen=%d\n", shdlc->rcv_q.qlen);
-
-	while ((skb = skb_dequeue(&shdlc->rcv_q)) != NULL) {
-		control = skb->data[0];
-		skb_pull(skb, 1);
-		switch (control & SHDLC_CONTROL_HEAD_MASK) {
-		case SHDLC_CONTROL_HEAD_I:
-		case SHDLC_CONTROL_HEAD_I2:
-			ns = (control & SHDLC_CONTROL_NS_MASK) >> 3;
-			nr = control & SHDLC_CONTROL_NR_MASK;
-			nfc_shdlc_rcv_i_frame(shdlc, skb, ns, nr);
-			break;
-		case SHDLC_CONTROL_HEAD_S:
-			s_frame_type = (control & SHDLC_CONTROL_TYPE_MASK) >> 3;
-			nr = control & SHDLC_CONTROL_NR_MASK;
-			nfc_shdlc_rcv_s_frame(shdlc, s_frame_type, nr);
-			kfree_skb(skb);
-			break;
-		case SHDLC_CONTROL_HEAD_U:
-			u_frame_modifier = control & SHDLC_CONTROL_M_MASK;
-			nfc_shdlc_rcv_u_frame(shdlc, skb, u_frame_modifier);
-			break;
-		default:
-			pr_err("UNKNOWN Control=%d\n", control);
-			kfree_skb(skb);
-			break;
-		}
-	}
-}
-
-static int nfc_shdlc_w_used(int ns, int dnr)
-{
-	int unack_count;
-
-	if (dnr <= ns)
-		unack_count = ns - dnr;
-	else
-		unack_count = 8 - dnr + ns;
-
-	return unack_count;
-}
-
-/* Send frames according to algorithm at spec:10.8.1 */
-static void nfc_shdlc_handle_send_queue(struct nfc_shdlc *shdlc)
-{
-	struct sk_buff *skb;
-	int r;
-	unsigned long time_sent;
-
-	if (shdlc->send_q.qlen)
-		pr_debug
-		    ("sendQlen=%d ns=%d dnr=%d rnr=%s w_room=%d unackQlen=%d\n",
-		     shdlc->send_q.qlen, shdlc->ns, shdlc->dnr,
-		     shdlc->rnr == false ? "false" : "true",
-		     shdlc->w - nfc_shdlc_w_used(shdlc->ns, shdlc->dnr),
-		     shdlc->ack_pending_q.qlen);
-
-	while (shdlc->send_q.qlen && shdlc->ack_pending_q.qlen < shdlc->w &&
-	       (shdlc->rnr == false)) {
-
-		if (shdlc->t1_active) {
-			del_timer_sync(&shdlc->t1_timer);
-			shdlc->t1_active = false;
-			pr_debug("Stopped T1(send ack)\n");
-		}
-
-		skb = skb_dequeue(&shdlc->send_q);
-
-		*skb_push(skb, 1) = SHDLC_CONTROL_HEAD_I | (shdlc->ns << 3) |
-				    shdlc->nr;
-
-		pr_debug("Sending I-Frame %d, waiting to rcv %d\n", shdlc->ns,
-			 shdlc->nr);
-	/*	SHDLC_DUMP_SKB("shdlc frame written", skb); */
-
-		r = shdlc->ops->xmit(shdlc, skb);
-		if (r < 0) {
-			shdlc->hard_fault = r;
-			break;
-		}
-
-		shdlc->ns = (shdlc->ns + 1) % 8;
-
-		time_sent = jiffies;
-		*(unsigned long *)skb->cb = time_sent;
-
-		skb_queue_tail(&shdlc->ack_pending_q, skb);
-
-		if (shdlc->t2_active == false) {
-			shdlc->t2_active = true;
-			mod_timer(&shdlc->t2_timer, time_sent +
-				  msecs_to_jiffies(SHDLC_T2_VALUE_MS));
-			pr_debug("Started T2 (retransmit)\n");
-		}
-	}
-}
-
-static void nfc_shdlc_connect_timeout(unsigned long data)
-{
-	struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data;
-
-	pr_debug("\n");
-
-	queue_work(system_nrt_wq, &shdlc->sm_work);
-}
-
-static void nfc_shdlc_t1_timeout(unsigned long data)
-{
-	struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data;
-
-	pr_debug("SoftIRQ: need to send ack\n");
-
-	queue_work(system_nrt_wq, &shdlc->sm_work);
-}
-
-static void nfc_shdlc_t2_timeout(unsigned long data)
-{
-	struct nfc_shdlc *shdlc = (struct nfc_shdlc *)data;
-
-	pr_debug("SoftIRQ: need to retransmit\n");
-
-	queue_work(system_nrt_wq, &shdlc->sm_work);
-}
-
-static void nfc_shdlc_sm_work(struct work_struct *work)
-{
-	struct nfc_shdlc *shdlc = container_of(work, struct nfc_shdlc, sm_work);
-	int r;
-
-	pr_debug("\n");
-
-	mutex_lock(&shdlc->state_mutex);
-
-	switch (shdlc->state) {
-	case SHDLC_DISCONNECTED:
-		skb_queue_purge(&shdlc->rcv_q);
-		skb_queue_purge(&shdlc->send_q);
-		skb_queue_purge(&shdlc->ack_pending_q);
-		break;
-	case SHDLC_CONNECTING:
-		if (shdlc->hard_fault) {
-			nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault);
-			break;
-		}
-
-		if (shdlc->connect_tries++ < 5)
-			r = nfc_shdlc_connect_initiate(shdlc);
-		else
-			r = -ETIME;
-		if (r < 0)
-			nfc_shdlc_connect_complete(shdlc, r);
-		else {
-			mod_timer(&shdlc->connect_timer, jiffies +
-				  msecs_to_jiffies(SHDLC_CONNECT_VALUE_MS));
-
-			shdlc->state = SHDLC_NEGOCIATING;
-		}
-		break;
-	case SHDLC_NEGOCIATING:
-		if (timer_pending(&shdlc->connect_timer) == 0) {
-			shdlc->state = SHDLC_CONNECTING;
-			queue_work(system_nrt_wq, &shdlc->sm_work);
-		}
-
-		nfc_shdlc_handle_rcv_queue(shdlc);
-
-		if (shdlc->hard_fault) {
-			nfc_shdlc_connect_complete(shdlc, shdlc->hard_fault);
-			break;
-		}
-		break;
-	case SHDLC_CONNECTED:
-		nfc_shdlc_handle_rcv_queue(shdlc);
-		nfc_shdlc_handle_send_queue(shdlc);
-
-		if (shdlc->t1_active && timer_pending(&shdlc->t1_timer) == 0) {
-			pr_debug
-			    ("Handle T1(send ack) elapsed (T1 now inactive)\n");
-
-			shdlc->t1_active = false;
-			r = nfc_shdlc_send_s_frame(shdlc, S_FRAME_RR,
-						   shdlc->nr);
-			if (r < 0)
-				shdlc->hard_fault = r;
-		}
-
-		if (shdlc->t2_active && timer_pending(&shdlc->t2_timer) == 0) {
-			pr_debug
-			    ("Handle T2(retransmit) elapsed (T2 inactive)\n");
-
-			shdlc->t2_active = false;
-
-			nfc_shdlc_requeue_ack_pending(shdlc);
-			nfc_shdlc_handle_send_queue(shdlc);
-		}
-
-		if (shdlc->hard_fault) {
-			nfc_hci_driver_failure(shdlc->hdev, shdlc->hard_fault);
-		}
-		break;
-	default:
-		break;
-	}
-	mutex_unlock(&shdlc->state_mutex);
-}
-
-/*
- * Called from syscall context to establish shdlc link. Sleeps until
- * link is ready or failure.
- */
-static int nfc_shdlc_connect(struct nfc_shdlc *shdlc)
-{
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(connect_wq);
-
-	pr_debug("\n");
-
-	mutex_lock(&shdlc->state_mutex);
-
-	shdlc->state = SHDLC_CONNECTING;
-	shdlc->connect_wq = &connect_wq;
-	shdlc->connect_tries = 0;
-	shdlc->connect_result = 1;
-
-	mutex_unlock(&shdlc->state_mutex);
-
-	queue_work(system_nrt_wq, &shdlc->sm_work);
-
-	wait_event(connect_wq, shdlc->connect_result != 1);
-
-	return shdlc->connect_result;
-}
-
-static void nfc_shdlc_disconnect(struct nfc_shdlc *shdlc)
-{
-	pr_debug("\n");
-
-	mutex_lock(&shdlc->state_mutex);
-
-	shdlc->state = SHDLC_DISCONNECTED;
-
-	mutex_unlock(&shdlc->state_mutex);
-
-	queue_work(system_nrt_wq, &shdlc->sm_work);
-}
-
-/*
- * Receive an incoming shdlc frame. Frame has already been crc-validated.
- * skb contains only LLC header and payload.
- * If skb == NULL, it is a notification that the link below is dead.
- */
-void nfc_shdlc_recv_frame(struct nfc_shdlc *shdlc, struct sk_buff *skb)
-{
-	if (skb == NULL) {
-		pr_err("NULL Frame -> link is dead\n");
-		shdlc->hard_fault = -EREMOTEIO;
-	} else {
-		SHDLC_DUMP_SKB("incoming frame", skb);
-		skb_queue_tail(&shdlc->rcv_q, skb);
-	}
-
-	queue_work(system_nrt_wq, &shdlc->sm_work);
-}
-EXPORT_SYMBOL(nfc_shdlc_recv_frame);
-
-static int nfc_shdlc_open(struct nfc_hci_dev *hdev)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-	int r;
-
-	pr_debug("\n");
-
-	if (shdlc->ops->open) {
-		r = shdlc->ops->open(shdlc);
-		if (r < 0)
-			return r;
-	}
-
-	r = nfc_shdlc_connect(shdlc);
-	if (r < 0 && shdlc->ops->close)
-		shdlc->ops->close(shdlc);
-
-	return r;
-}
-
-static void nfc_shdlc_close(struct nfc_hci_dev *hdev)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
-	pr_debug("\n");
-
-	nfc_shdlc_disconnect(shdlc);
-
-	if (shdlc->ops->close)
-		shdlc->ops->close(shdlc);
-}
-
-static int nfc_shdlc_hci_ready(struct nfc_hci_dev *hdev)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-	int r = 0;
-
-	pr_debug("\n");
-
-	if (shdlc->ops->hci_ready)
-		r = shdlc->ops->hci_ready(shdlc);
-
-	return r;
-}
-
-static int nfc_shdlc_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
-	SHDLC_DUMP_SKB("queuing HCP packet to shdlc", skb);
-
-	skb_queue_tail(&shdlc->send_q, skb);
-
-	queue_work(system_nrt_wq, &shdlc->sm_work);
-
-	return 0;
-}
-
-static int nfc_shdlc_start_poll(struct nfc_hci_dev *hdev,
-				u32 im_protocols, u32 tm_protocols)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
-	pr_debug("\n");
-
-	if (shdlc->ops->start_poll)
-		return shdlc->ops->start_poll(shdlc,
-					      im_protocols, tm_protocols);
-
-	return 0;
-}
-
-static int nfc_shdlc_target_from_gate(struct nfc_hci_dev *hdev, u8 gate,
-				      struct nfc_target *target)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
-	if (shdlc->ops->target_from_gate)
-		return shdlc->ops->target_from_gate(shdlc, gate, target);
-
-	return -EPERM;
-}
-
-static int nfc_shdlc_complete_target_discovered(struct nfc_hci_dev *hdev,
-						u8 gate,
-						struct nfc_target *target)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
-	pr_debug("\n");
-
-	if (shdlc->ops->complete_target_discovered)
-		return shdlc->ops->complete_target_discovered(shdlc, gate,
-							      target);
-
-	return 0;
-}
-
-static int nfc_shdlc_data_exchange(struct nfc_hci_dev *hdev,
-				   struct nfc_target *target,
-				   struct sk_buff *skb,
-				   data_exchange_cb_t cb, void *cb_context)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
-	if (shdlc->ops->data_exchange)
-		return shdlc->ops->data_exchange(shdlc, target, skb, cb,
-						 cb_context);
-
-	return -EPERM;
-}
-
-static int nfc_shdlc_check_presence(struct nfc_hci_dev *hdev,
-				    struct nfc_target *target)
-{
-	struct nfc_shdlc *shdlc = nfc_hci_get_clientdata(hdev);
-
-	if (shdlc->ops->check_presence)
-		return shdlc->ops->check_presence(shdlc, target);
-
-	return 0;
-}
-
-static struct nfc_hci_ops shdlc_ops = {
-	.open = nfc_shdlc_open,
-	.close = nfc_shdlc_close,
-	.hci_ready = nfc_shdlc_hci_ready,
-	.xmit = nfc_shdlc_xmit,
-	.start_poll = nfc_shdlc_start_poll,
-	.target_from_gate = nfc_shdlc_target_from_gate,
-	.complete_target_discovered = nfc_shdlc_complete_target_discovered,
-	.data_exchange = nfc_shdlc_data_exchange,
-	.check_presence = nfc_shdlc_check_presence,
-};
-
-struct nfc_shdlc *nfc_shdlc_allocate(struct nfc_shdlc_ops *ops,
-				     struct nfc_hci_init_data *init_data,
-				     u32 protocols,
-				     int tx_headroom, int tx_tailroom,
-				     int max_link_payload, const char *devname)
-{
-	struct nfc_shdlc *shdlc;
-	int r;
-
-	if (ops->xmit == NULL)
-		return NULL;
-
-	shdlc = kzalloc(sizeof(struct nfc_shdlc), GFP_KERNEL);
-	if (shdlc == NULL)
-		return NULL;
-
-	mutex_init(&shdlc->state_mutex);
-	shdlc->ops = ops;
-	shdlc->state = SHDLC_DISCONNECTED;
-
-	init_timer(&shdlc->connect_timer);
-	shdlc->connect_timer.data = (unsigned long)shdlc;
-	shdlc->connect_timer.function = nfc_shdlc_connect_timeout;
-
-	init_timer(&shdlc->t1_timer);
-	shdlc->t1_timer.data = (unsigned long)shdlc;
-	shdlc->t1_timer.function = nfc_shdlc_t1_timeout;
-
-	init_timer(&shdlc->t2_timer);
-	shdlc->t2_timer.data = (unsigned long)shdlc;
-	shdlc->t2_timer.function = nfc_shdlc_t2_timeout;
-
-	shdlc->w = SHDLC_MAX_WINDOW;
-	shdlc->srej_support = SHDLC_SREJ_SUPPORT;
-
-	skb_queue_head_init(&shdlc->rcv_q);
-	skb_queue_head_init(&shdlc->send_q);
-	skb_queue_head_init(&shdlc->ack_pending_q);
-
-	INIT_WORK(&shdlc->sm_work, nfc_shdlc_sm_work);
-
-	shdlc->client_headroom = tx_headroom;
-	shdlc->client_tailroom = tx_tailroom;
-
-	shdlc->hdev = nfc_hci_allocate_device(&shdlc_ops, init_data, protocols,
-					      tx_headroom + SHDLC_LLC_HEAD_ROOM,
-					      tx_tailroom,
-					      max_link_payload);
-	if (shdlc->hdev == NULL)
-		goto err_allocdev;
-
-	nfc_hci_set_clientdata(shdlc->hdev, shdlc);
-
-	r = nfc_hci_register_device(shdlc->hdev);
-	if (r < 0)
-		goto err_regdev;
-
-	return shdlc;
-
-err_regdev:
-	nfc_hci_free_device(shdlc->hdev);
-
-err_allocdev:
-	kfree(shdlc);
-
-	return NULL;
-}
-EXPORT_SYMBOL(nfc_shdlc_allocate);
-
-void nfc_shdlc_free(struct nfc_shdlc *shdlc)
-{
-	pr_debug("\n");
-
-	nfc_hci_unregister_device(shdlc->hdev);
-	nfc_hci_free_device(shdlc->hdev);
-
-	cancel_work_sync(&shdlc->sm_work);
-
-	skb_queue_purge(&shdlc->rcv_q);
-	skb_queue_purge(&shdlc->send_q);
-	skb_queue_purge(&shdlc->ack_pending_q);
-
-	kfree(shdlc);
-}
-EXPORT_SYMBOL(nfc_shdlc_free);
-
-void nfc_shdlc_set_clientdata(struct nfc_shdlc *shdlc, void *clientdata)
-{
-	pr_debug("\n");
-
-	shdlc->clientdata = clientdata;
-}
-EXPORT_SYMBOL(nfc_shdlc_set_clientdata);
-
-void *nfc_shdlc_get_clientdata(struct nfc_shdlc *shdlc)
-{
-	return shdlc->clientdata;
-}
-EXPORT_SYMBOL(nfc_shdlc_get_clientdata);
-
-struct nfc_hci_dev *nfc_shdlc_get_hci_dev(struct nfc_shdlc *shdlc)
-{
-	return shdlc->hdev;
-}
-EXPORT_SYMBOL(nfc_shdlc_get_hci_dev);
-- 
cgit v1.2.3


From 96e324024b421b3753eb142d5d92fbe4ac5e7519 Mon Sep 17 00:00:00 2001
From: Waldemar Rymarkiewicz <waldemar.rymarkiewicz@tieto.com>
Date: Thu, 20 Sep 2012 08:59:10 +0200
Subject: NFC: xmit from hci ops must return 0 or negative

xmit callback provided by a driver encapsulates upper layers
data and sends it to the hardware. So, HCI does not know the
exact amount of data being sent and thus can't handle partially
sent frames properly.

Therefore, the driver must return 0 for completely sent frame or
negative for failure.

Signed-off-by: Waldemar Rymarkiewicz <waldemar.rymarkiewicz@tieto.com>
Acked-by: Eric Lapuyade <eric.lapuyade@intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/pn544_hci.c | 8 ++++++--
 include/net/nfc/hci.h   | 5 +++++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/nfc/pn544_hci.c b/drivers/nfc/pn544_hci.c
index 7b0c2176e89b..c9c8570273ab 100644
--- a/drivers/nfc/pn544_hci.c
+++ b/drivers/nfc/pn544_hci.c
@@ -235,8 +235,12 @@ static int pn544_hci_i2c_write(struct i2c_client *client, u8 *buf, int len)
 		r = i2c_master_send(client, buf, len);
 	}
 
-	if (r >= 0 && r != len)
-		r = -EREMOTEIO;
+	if (r >= 0) {
+		if (r != len)
+			return -EREMOTEIO;
+		else
+			return 0;
+	}
 
 	return r;
 }
diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 9b5ed2d94d60..e900072950cb 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -30,6 +30,11 @@ struct nfc_hci_ops {
 	int (*open) (struct nfc_hci_dev *hdev);
 	void (*close) (struct nfc_hci_dev *hdev);
 	int (*hci_ready) (struct nfc_hci_dev *hdev);
+	/*
+	 * xmit must always send the complete buffer before
+	 * returning. Returned result must be 0 for success
+	 * or negative for failure.
+	 */
 	int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
 	int (*start_poll) (struct nfc_hci_dev *hdev,
 			   u32 im_protocols, u32 tm_protocols);
-- 
cgit v1.2.3


From 4463523bef98ff827a89cf8219db7dfac4350241 Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@linux.intel.com>
Date: Wed, 26 Sep 2012 18:16:44 +0200
Subject: NFC: LLCP raw socket support

This adds support for socket of type SOCK_RAW to LLCP.
sk_buff are copied and sent to raw sockets with a 2 bytes extra header:
The first byte header contains the nfc adapter index.
The second one contains flags:
- 0x01 - Direction (0=RX, 1=TX)
- 0x02-0x80 - Reserved
A raw socket has to be explicitly bound to a nfc adapter. This is achieved
by specifying the adapter index to be bound to in the dev_idx field of the
sockaddr_nfc_llcp struct passed to bind().

Signed-off-by: Thierry Escande <thierry.escande@linux.intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/nfc.h     | 11 ++++++
 net/nfc/llcp/commands.c |  2 ++
 net/nfc/llcp/llcp.c     | 46 +++++++++++++++++++++++++
 net/nfc/llcp/llcp.h     |  3 ++
 net/nfc/llcp/sock.c     | 90 ++++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 148 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/nfc.h b/include/linux/nfc.h
index 6189f27e305b..d908d17da56d 100644
--- a/include/linux/nfc.h
+++ b/include/linux/nfc.h
@@ -183,4 +183,15 @@ struct sockaddr_nfc_llcp {
 
 #define NFC_HEADER_SIZE 1
 
+/**
+ * Pseudo-header info for raw socket packets
+ * First byte is the adapter index
+ * Second byte contains flags
+ *  - 0x01 - Direction (0=RX, 1=TX)
+ *  - 0x02-0x80 - Reserved
+ **/
+#define NFC_LLCP_RAW_HEADER_SIZE	2
+#define NFC_LLCP_DIRECTION_RX		0x00
+#define NFC_LLCP_DIRECTION_TX		0x01
+
 #endif /*__LINUX_NFC_H */
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c
index b982b5b890d7..c45ccd6c094c 100644
--- a/net/nfc/llcp/commands.c
+++ b/net/nfc/llcp/commands.c
@@ -312,6 +312,8 @@ int nfc_llcp_send_symm(struct nfc_dev *dev)
 
 	skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM);
 
+	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_TX);
+
 	return nfc_data_exchange(dev, local->target_idx, skb,
 				 nfc_llcp_recv, local);
 }
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index d649fbf39d58..fc43747f0c33 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -558,6 +558,46 @@ static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu)
 	sock->recv_ack_n = (sock->recv_n - 1) % 16;
 }
 
+void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
+			       struct sk_buff *skb, u8 direction)
+{
+	struct hlist_node *node;
+	struct sk_buff *skb_copy = NULL, *nskb;
+	struct sock *sk;
+	u8 *data;
+
+	read_lock(&local->raw_sockets.lock);
+
+	sk_for_each(sk, node, &local->raw_sockets.head) {
+		if (sk->sk_state != LLCP_BOUND)
+			continue;
+
+		if (skb_copy == NULL) {
+			skb_copy = __pskb_copy(skb, NFC_LLCP_RAW_HEADER_SIZE,
+					       GFP_ATOMIC);
+
+			if (skb_copy == NULL)
+				continue;
+
+			data = skb_push(skb_copy, NFC_LLCP_RAW_HEADER_SIZE);
+
+			data[0] = local->dev ? local->dev->idx : 0xFF;
+			data[1] = direction;
+		}
+
+		nskb = skb_clone(skb_copy, GFP_ATOMIC);
+		if (!nskb)
+			continue;
+
+		if (sock_queue_rcv_skb(sk, nskb))
+			kfree_skb(nskb);
+	}
+
+	read_unlock(&local->raw_sockets.lock);
+
+	kfree_skb(skb_copy);
+}
+
 static void nfc_llcp_tx_work(struct work_struct *work)
 {
 	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
@@ -578,6 +618,9 @@ static void nfc_llcp_tx_work(struct work_struct *work)
 				       DUMP_PREFIX_OFFSET, 16, 1,
 				       skb->data, skb->len, true);
 
+			nfc_llcp_send_to_raw_sock(local, skb,
+						  NFC_LLCP_DIRECTION_TX);
+
 			ret = nfc_data_exchange(local->dev, local->target_idx,
 						skb, nfc_llcp_recv, local);
 
@@ -1022,6 +1065,8 @@ static void nfc_llcp_rx_work(struct work_struct *work)
 		print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET,
 			       16, 1, skb->data, skb->len, true);
 
+	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
+
 	switch (ptype) {
 	case LLCP_PDU_SYMM:
 		pr_debug("SYMM\n");
@@ -1158,6 +1203,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 
 	rwlock_init(&local->sockets.lock);
 	rwlock_init(&local->connecting_sockets.lock);
+	rwlock_init(&local->raw_sockets.lock);
 
 	nfc_llcp_build_gb(local);
 
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h
index af395c9ceb03..fdb2d24e60bd 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp/llcp.h
@@ -86,6 +86,7 @@ struct nfc_llcp_local {
 	/* sockets array */
 	struct llcp_sock_list sockets;
 	struct llcp_sock_list connecting_sockets;
+	struct llcp_sock_list raw_sockets;
 };
 
 struct nfc_llcp_sock {
@@ -184,6 +185,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
 u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local);
 void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap);
 int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock);
+void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
+			       struct sk_buff *skb, u8 direction);
 
 /* Sock API */
 struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp);
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index 6e188d4020ba..40f056debf9a 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -142,6 +142,60 @@ error:
 	return ret;
 }
 
+static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr,
+			      int alen)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	struct nfc_llcp_local *local;
+	struct nfc_dev *dev;
+	struct sockaddr_nfc_llcp llcp_addr;
+	int len, ret = 0;
+
+	if (!addr || addr->sa_family != AF_NFC)
+		return -EINVAL;
+
+	pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
+
+	memset(&llcp_addr, 0, sizeof(llcp_addr));
+	len = min_t(unsigned int, sizeof(llcp_addr), alen);
+	memcpy(&llcp_addr, addr, len);
+
+	lock_sock(sk);
+
+	if (sk->sk_state != LLCP_CLOSED) {
+		ret = -EBADFD;
+		goto error;
+	}
+
+	dev = nfc_get_device(llcp_addr.dev_idx);
+	if (dev == NULL) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL) {
+		ret = -ENODEV;
+		goto put_dev;
+	}
+
+	llcp_sock->dev = dev;
+	llcp_sock->local = nfc_llcp_local_get(local);
+	llcp_sock->nfc_protocol = llcp_addr.nfc_protocol;
+
+	nfc_llcp_sock_link(&local->raw_sockets, sk);
+
+	sk->sk_state = LLCP_BOUND;
+
+put_dev:
+	nfc_put_device(dev);
+
+error:
+	release_sock(sk);
+	return ret;
+}
+
 static int llcp_sock_listen(struct socket *sock, int backlog)
 {
 	struct sock *sk = sock->sk;
@@ -418,7 +472,10 @@ static int llcp_sock_release(struct socket *sock)
 
 	release_sock(sk);
 
-	nfc_llcp_sock_unlink(&local->sockets, sk);
+	if (sock->type == SOCK_RAW)
+		nfc_llcp_sock_unlink(&local->raw_sockets, sk);
+	else
+		nfc_llcp_sock_unlink(&local->sockets, sk);
 
 out:
 	sock_orphan(sk);
@@ -614,7 +671,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (!(flags & MSG_PEEK)) {
 
 		/* SOCK_STREAM: re-queue skb if it contains unreceived data */
-		if (sk->sk_type == SOCK_STREAM) {
+		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_RAW) {
 			skb_pull(skb, copied);
 			if (skb->len) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
@@ -655,6 +712,26 @@ static const struct proto_ops llcp_sock_ops = {
 	.mmap           = sock_no_mmap,
 };
 
+static const struct proto_ops llcp_rawsock_ops = {
+	.family         = PF_NFC,
+	.owner          = THIS_MODULE,
+	.bind           = llcp_raw_sock_bind,
+	.connect        = sock_no_connect,
+	.release        = llcp_sock_release,
+	.socketpair     = sock_no_socketpair,
+	.accept         = sock_no_accept,
+	.getname        = llcp_sock_getname,
+	.poll           = llcp_sock_poll,
+	.ioctl          = sock_no_ioctl,
+	.listen         = sock_no_listen,
+	.shutdown       = sock_no_shutdown,
+	.setsockopt     = sock_no_setsockopt,
+	.getsockopt     = sock_no_getsockopt,
+	.sendmsg        = sock_no_sendmsg,
+	.recvmsg        = llcp_sock_recvmsg,
+	.mmap           = sock_no_mmap,
+};
+
 static void llcp_sock_destruct(struct sock *sk)
 {
 	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -732,10 +809,15 @@ static int llcp_sock_create(struct net *net, struct socket *sock,
 
 	pr_debug("%p\n", sock);
 
-	if (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM)
+	if (sock->type != SOCK_STREAM &&
+	    sock->type != SOCK_DGRAM &&
+	    sock->type != SOCK_RAW)
 		return -ESOCKTNOSUPPORT;
 
-	sock->ops = &llcp_sock_ops;
+	if (sock->type == SOCK_RAW)
+		sock->ops = &llcp_rawsock_ops;
+	else
+		sock->ops = &llcp_sock_ops;
 
 	sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC);
 	if (sk == NULL)
-- 
cgit v1.2.3


From eccc1bb8d4b4cf68d3c9becb083fa94ada7d495c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 25 Sep 2012 11:02:48 +0000
Subject: tunnel: drop packet if ECN present with not-ECT

Linux tunnels were written before RFC6040 and therefore never
implemented the corner case of ECN getting set in the outer header
and the inner header not being ready for it.

Section 4.2.  Default Tunnel Egress Behaviour.
 o If the inner ECN field is Not-ECT, the decapsulator MUST NOT
      propagate any other ECN codepoint onwards.  This is because the
      inner Not-ECT marking is set by transports that rely on dropped
      packets as an indication of congestion and would not understand or
      respond to any other ECN codepoint [RFC4774].  Specifically:

      *  If the inner ECN field is Not-ECT and the outer ECN field is
         CE, the decapsulator MUST drop the packet.

      *  If the inner ECN field is Not-ECT and the outer ECN field is
         Not-ECT, ECT(0), or ECT(1), the decapsulator MUST forward the
         outgoing packet with the ECN field cleared to Not-ECT.

This patch moves the ECN decap logic out of the individual tunnels
into a common place.

It also adds logging to allow detecting broken systems that
set ECN bits incorrectly when tunneling (or an intermediate
router might be changing the header).

Overloads rx_frame_error to keep track of ECN related error.

Thanks to Chris Wright who caught this while reviewing the new VXLAN
tunnel.

This code was tested by injecting faulty logic in other end GRE
to send incorrectly encapsulated packets.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_ecn.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ip_gre.c      | 38 ++++++++++++++-----------
 net/ipv4/ipip.c        | 42 +++++++++++++++++-----------
 net/ipv6/ip6_gre.c     | 54 ++++++++++++++++-------------------
 4 files changed, 147 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 2fa14691869c..aab73757bc4d 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -15,6 +15,8 @@ enum {
 	INET_ECN_MASK = 3,
 };
 
+extern int sysctl_tunnel_ecn_log;
+
 static inline int INET_ECN_is_ce(__u8 dsfield)
 {
 	return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
@@ -145,4 +147,78 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
 	return 0;
 }
 
+/*
+ * RFC 6080 4.2
+ *  To decapsulate the inner header at the tunnel egress, a compliant
+ *  tunnel egress MUST set the outgoing ECN field to the codepoint at the
+ *  intersection of the appropriate arriving inner header (row) and outer
+ *  header (column) in Figure 4
+ *
+ *      +---------+------------------------------------------------+
+ *      |Arriving |            Arriving Outer Header               |
+ *      |   Inner +---------+------------+------------+------------+
+ *      |  Header | Not-ECT | ECT(0)     | ECT(1)     |     CE     |
+ *      +---------+---------+------------+------------+------------+
+ *      | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
+ *      |  ECT(0) |  ECT(0) | ECT(0)     | ECT(1)     |     CE     |
+ *      |  ECT(1) |  ECT(1) | ECT(1) (!) | ECT(1)     |     CE     |
+ *      |    CE   |      CE |     CE     |     CE(!!!)|     CE     |
+ *      +---------+---------+------------+------------+------------+
+ *
+ *             Figure 4: New IP in IP Decapsulation Behaviour
+ *
+ *  returns 0 on success
+ *          1 if something is broken and should be logged (!!! above)
+ *          2 if packet should be dropped
+ */
+static inline int INET_ECN_decapsulate(struct sk_buff *skb,
+				       __u8 outer, __u8 inner)
+{
+	if (INET_ECN_is_not_ect(inner)) {
+		switch (outer & INET_ECN_MASK) {
+		case INET_ECN_NOT_ECT:
+			return 0;
+		case INET_ECN_ECT_0:
+		case INET_ECN_ECT_1:
+			return 1;
+		case INET_ECN_CE:
+			return 2;
+		}
+	}
+
+	if (INET_ECN_is_ce(outer))
+		INET_ECN_set_ce(skb);
+
+	return 0;
+}
+
+static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
+				     struct sk_buff *skb)
+{
+	__u8 inner;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		inner = ip_hdr(skb)->tos;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		inner = ipv6_get_dsfield(ipv6_hdr(skb));
+	else
+		return 0;
+
+	return INET_ECN_decapsulate(skb, oiph->tos, inner);
+}
+
+static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
+				      struct sk_buff *skb)
+{
+	__u8 inner;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		inner = ip_hdr(skb)->tos;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		inner = ipv6_get_dsfield(ipv6_hdr(skb));
+	else
+		return 0;
+
+	return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
+}
 #endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 1c012cb2cb94..ef0b861ce044 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -120,6 +120,10 @@
    Alexey Kuznetsov.
  */
 
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
 static void ipgre_tunnel_setup(struct net_device *dev);
@@ -204,7 +208,9 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
 	tot->rx_crc_errors = dev->stats.rx_crc_errors;
 	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
 	tot->rx_length_errors = dev->stats.rx_length_errors;
+	tot->rx_frame_errors = dev->stats.rx_frame_errors;
 	tot->rx_errors = dev->stats.rx_errors;
+
 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 	tot->tx_dropped = dev->stats.tx_dropped;
@@ -587,17 +593,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 	t->err_time = jiffies;
 }
 
-static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb)
-{
-	if (INET_ECN_is_ce(iph->tos)) {
-		if (skb->protocol == htons(ETH_P_IP)) {
-			IP_ECN_set_ce(ip_hdr(skb));
-		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-			IP6_ECN_set_ce(ipv6_hdr(skb));
-		}
-	}
-}
-
 static inline u8
 ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
 {
@@ -620,6 +615,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 	struct ip_tunnel *tunnel;
 	int    offset = 4;
 	__be16 gre_proto;
+	int    err;
 
 	if (!pskb_may_pull(skb, 16))
 		goto drop;
@@ -723,17 +719,27 @@ static int ipgre_rcv(struct sk_buff *skb)
 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 		}
 
+		__skb_tunnel_rx(skb, tunnel->dev);
+
+		skb_reset_network_header(skb);
+		err = IP_ECN_decapsulate(iph, skb);
+		if (unlikely(err)) {
+			if (log_ecn_error)
+				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+						     &iph->saddr, iph->tos);
+			if (err > 1) {
+				++tunnel->dev->stats.rx_frame_errors;
+				++tunnel->dev->stats.rx_errors;
+				goto drop;
+			}
+		}
+
 		tstats = this_cpu_ptr(tunnel->dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->rx_packets++;
 		tstats->rx_bytes += skb->len;
 		u64_stats_update_end(&tstats->syncp);
 
-		__skb_tunnel_rx(skb, tunnel->dev);
-
-		skb_reset_network_header(skb);
-		ipgre_ecn_decapsulate(iph, skb);
-
 		netif_rx(skb);
 
 		return 0;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 618bde867ac1..e15b45297c09 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -120,6 +120,10 @@
 #define HASH_SIZE  16
 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
 static int ipip_net_id __read_mostly;
 struct ipip_net {
 	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
@@ -400,28 +404,18 @@ out:
 	return err;
 }
 
-static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
-					struct sk_buff *skb)
-{
-	struct iphdr *inner_iph = ip_hdr(skb);
-
-	if (INET_ECN_is_ce(outer_iph->tos))
-		IP_ECN_set_ce(inner_iph);
-}
-
 static int ipip_rcv(struct sk_buff *skb)
 {
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph = ip_hdr(skb);
+	int err;
 
 	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
 	if (tunnel != NULL) {
 		struct pcpu_tstats *tstats;
 
-		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-			kfree_skb(skb);
-			return 0;
-		}
+		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+			goto drop;
 
 		secpath_reset(skb);
 
@@ -430,21 +424,35 @@ static int ipip_rcv(struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
 
+		__skb_tunnel_rx(skb, tunnel->dev);
+
+		err = IP_ECN_decapsulate(iph, skb);
+		if (unlikely(err)) {
+			if (log_ecn_error)
+				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+						     &iph->saddr, iph->tos);
+			if (err > 1) {
+				++tunnel->dev->stats.rx_frame_errors;
+				++tunnel->dev->stats.rx_errors;
+				goto drop;
+			}
+		}
+
 		tstats = this_cpu_ptr(tunnel->dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->rx_packets++;
 		tstats->rx_bytes += skb->len;
 		u64_stats_update_end(&tstats->syncp);
 
-		__skb_tunnel_rx(skb, tunnel->dev);
-
-		ipip_ecn_decapsulate(iph, skb);
-
 		netif_rx(skb);
 		return 0;
 	}
 
 	return -1;
+
+drop:
+	kfree_skb(skb);
+	return 0;
 }
 
 /*
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index b987d4db790f..613a16647741 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -56,6 +56,10 @@
 #include <net/ip6_tunnel.h>
 
 
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
 #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
 #define IPV6_TCLASS_SHIFT 20
 
@@ -149,7 +153,9 @@ static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
 	tot->rx_crc_errors = dev->stats.rx_crc_errors;
 	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
 	tot->rx_length_errors = dev->stats.rx_length_errors;
+	tot->rx_frame_errors = dev->stats.rx_frame_errors;
 	tot->rx_errors = dev->stats.rx_errors;
+
 	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
 	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
 	tot->tx_dropped = dev->stats.tx_dropped;
@@ -489,28 +495,6 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	t->err_time = jiffies;
 }
 
-static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t,
-		const struct ipv6hdr *ipv6h, struct sk_buff *skb)
-{
-	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
-
-	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
-
-	if (INET_ECN_is_ce(dsfield))
-		IP_ECN_set_ce(ip_hdr(skb));
-}
-
-static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t,
-		const struct ipv6hdr *ipv6h, struct sk_buff *skb)
-{
-	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
-
-	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
-		IP6_ECN_set_ce(ipv6_hdr(skb));
-}
-
 static int ip6gre_rcv(struct sk_buff *skb)
 {
 	const struct ipv6hdr *ipv6h;
@@ -522,6 +506,7 @@ static int ip6gre_rcv(struct sk_buff *skb)
 	struct ip6_tnl *tunnel;
 	int    offset = 4;
 	__be16 gre_proto;
+	int err;
 
 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
 		goto drop;
@@ -625,20 +610,29 @@ static int ip6gre_rcv(struct sk_buff *skb)
 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 		}
 
+		__skb_tunnel_rx(skb, tunnel->dev);
+
+		skb_reset_network_header(skb);
+
+		err = IP6_ECN_decapsulate(ipv6h, skb);
+		if (unlikely(err)) {
+			if (log_ecn_error)
+				net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
+						     &ipv6h->saddr,
+						     ipv6_get_dsfield(ipv6h));
+			if (err > 1) {
+				++tunnel->dev->stats.rx_frame_errors;
+				++tunnel->dev->stats.rx_errors;
+				goto drop;
+			}
+		}
+
 		tstats = this_cpu_ptr(tunnel->dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->rx_packets++;
 		tstats->rx_bytes += skb->len;
 		u64_stats_update_end(&tstats->syncp);
 
-		__skb_tunnel_rx(skb, tunnel->dev);
-
-		skb_reset_network_header(skb);
-		if (skb->protocol == htons(ETH_P_IP))
-			ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb);
-		else if (skb->protocol == htons(ETH_P_IPV6))
-			ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb);
-
 		netif_rx(skb);
 
 		return 0;
-- 
cgit v1.2.3


From e2bcabec6ea5ba30dd2097dc1566e9957d14117c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 25 Sep 2012 11:32:13 +0000
Subject: net: remove sk_init() helper

It seems sk_init() has no value today and even does strange things :

# grep . /proc/sys/net/core/?mem_*
/proc/sys/net/core/rmem_default:212992
/proc/sys/net/core/rmem_max:131071
/proc/sys/net/core/wmem_default:212992
/proc/sys/net/core/wmem_max:131071

We can remove it completely.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Shan Wei <davidshan@tencent.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  2 --
 net/core/sock.c    | 13 -------------
 net/socket.c       |  6 ------
 3 files changed, 21 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index f036493b9a61..bc476a19f28e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2229,8 +2229,6 @@ extern int net_msg_warn;
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 
-extern void sk_init(void);
-
 extern int sysctl_optmem_max;
 
 extern __u32 sysctl_wmem_default;
diff --git a/net/core/sock.c b/net/core/sock.c
index 727114cd6f7e..f5a426097236 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1464,19 +1464,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 }
 EXPORT_SYMBOL_GPL(sk_setup_caps);
 
-void __init sk_init(void)
-{
-	if (totalram_pages <= 4096) {
-		sysctl_wmem_max = 32767;
-		sysctl_rmem_max = 32767;
-		sysctl_wmem_default = 32767;
-		sysctl_rmem_default = 32767;
-	} else if (totalram_pages >= 131072) {
-		sysctl_wmem_max = 131071;
-		sysctl_rmem_max = 131071;
-	}
-}
-
 /*
  *	Simple resource managers for sockets.
  */
diff --git a/net/socket.c b/net/socket.c
index c641549a13e2..80dc7e84b046 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2600,12 +2600,6 @@ static int __init sock_init(void)
 	if (err)
 		goto out;
 
-	/*
-	 *      Initialize sock SLAB cache.
-	 */
-
-	sk_init();
-
 	/*
 	 *      Initialize skbuff SLAB cache
 	 */
-- 
cgit v1.2.3


From 404f7c9e118e0c92902afe1853d35f5638fe4a4c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 26 Sep 2012 07:07:47 +0000
Subject: net: struct napi_struct fields reordering

Remove two holes on 64bit arches, and put dev_list at the end of
napi_struct since its not used in fast path.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6c131f055ab0..dd320bb22a5a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -338,18 +338,16 @@ struct napi_struct {
 
 	unsigned long		state;
 	int			weight;
+	unsigned int		gro_count;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
 	spinlock_t		poll_lock;
 	int			poll_owner;
 #endif
-
-	unsigned int		gro_count;
-
 	struct net_device	*dev;
-	struct list_head	dev_list;
 	struct sk_buff		*gro_list;
 	struct sk_buff		*skb;
+	struct list_head	dev_list;
 };
 
 enum {
-- 
cgit v1.2.3


From c9e6bc644e557338221e75c242ab12c275a67d1b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2012 19:29:05 +0000
Subject: net: add gro_cells infrastructure

This adds a new include file (include/net/gro_cells.h), to bring GRO
(Generic Receive Offload) capability to tunnels, in a modular way.

Because tunnels receive path is lockless, and GRO adds a serialization
using a napi_struct, I chose to add an array of up to
DEFAULT_MAX_NUM_RSS_QUEUES cells, so that multi queue devices wont be
slowed down because of GRO layer.

skb_get_rx_queue() is used as selector.

In the future, we might add optional fanout capabilities, using rxhash
for example.

With help from Ben Hutchings who reminded me
netif_get_num_default_rss_queues() function.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gro_cells.h | 103 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c          |   2 +
 2 files changed, 105 insertions(+)
 create mode 100644 include/net/gro_cells.h

(limited to 'include')

diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
new file mode 100644
index 000000000000..4fd8a4b4b7ee
--- /dev/null
+++ b/include/net/gro_cells.h
@@ -0,0 +1,103 @@
+#ifndef _NET_GRO_CELLS_H
+#define _NET_GRO_CELLS_H
+
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+
+struct gro_cell {
+	struct sk_buff_head	napi_skbs;
+	struct napi_struct	napi;
+} ____cacheline_aligned_in_smp;
+
+struct gro_cells {
+	unsigned int		gro_cells_mask;
+	struct gro_cell		*cells;
+};
+
+static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
+{
+	unsigned long flags;
+	struct gro_cell *cell = gcells->cells;
+	struct net_device *dev = skb->dev;
+
+	if (!cell || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
+		netif_rx(skb);
+		return;
+	}
+
+	if (skb_rx_queue_recorded(skb))
+		cell += skb_get_rx_queue(skb) & gcells->gro_cells_mask;
+
+	if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+		atomic_long_inc(&dev->rx_dropped);
+		kfree_skb(skb);
+		return;
+	}
+
+	spin_lock_irqsave(&cell->napi_skbs.lock, flags);
+
+	__skb_queue_tail(&cell->napi_skbs, skb);
+	if (skb_queue_len(&cell->napi_skbs) == 1)
+		napi_schedule(&cell->napi);
+
+	spin_unlock_irqrestore(&cell->napi_skbs.lock, flags);
+}
+
+static inline int gro_cell_poll(struct napi_struct *napi, int budget)
+{
+	struct gro_cell *cell = container_of(napi, struct gro_cell, napi);
+	struct sk_buff *skb;
+	int work_done = 0;
+
+	while (work_done < budget) {
+		skb = skb_dequeue(&cell->napi_skbs);
+		if (!skb)
+			break;
+
+		napi_gro_receive(napi, skb);
+		work_done++;
+	}
+
+	if (work_done < budget)
+		napi_complete(napi);
+	return work_done;
+}
+
+static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *dev)
+{
+	int i;
+
+	gcells->gro_cells_mask = roundup_pow_of_two(netif_get_num_default_rss_queues()) - 1;
+	gcells->cells = kcalloc(sizeof(struct gro_cell),
+				gcells->gro_cells_mask + 1,
+				GFP_KERNEL);
+	if (!gcells->cells)
+		return -ENOMEM;
+
+	for (i = 0; i <= gcells->gro_cells_mask; i++) {
+		struct gro_cell *cell = gcells->cells + i;
+
+		skb_queue_head_init(&cell->napi_skbs);
+		netif_napi_add(dev, &cell->napi, gro_cell_poll, 64);
+		napi_enable(&cell->napi);
+	}
+	return 0;
+}
+
+static inline void gro_cells_destroy(struct gro_cells *gcells)
+{
+	struct gro_cell *cell = gcells->cells;
+	int i;
+
+	if (!cell)
+		return;
+	for (i = 0; i <= gcells->gro_cells_mask; i++,cell++) {
+		netif_napi_del(&cell->napi);
+		skb_queue_purge(&cell->napi_skbs);
+	}
+	kfree(gcells->cells);
+	gcells->cells = NULL;
+}
+
+#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 3e645f3751bf..ba4bb118a756 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2645,6 +2645,8 @@ EXPORT_SYMBOL(dev_queue_xmit);
   =======================================================================*/
 
 int netdev_max_backlog __read_mostly = 1000;
+EXPORT_SYMBOL(netdev_max_backlog);
+
 int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
-- 
cgit v1.2.3


From 60769a5dcd8755715c7143b4571d5c44f01796f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 27 Sep 2012 02:48:50 +0000
Subject: ipv4: gre: add GRO capability

Add GRO capability to IPv4 GRE tunnels, using the gro_cells
infrastructure.

Tested using IPv4 and IPv6 TCP traffic inside this tunnel, and
checking GRO is building large packets.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipip.h |  3 +++
 net/ipv4/ip_gre.c  | 13 +++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ipip.h b/include/net/ipip.h
index a93cf6d7e94b..ddc077c51f32 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -2,6 +2,7 @@
 #define __NET_IPIP_H 1
 
 #include <linux/if_tunnel.h>
+#include <net/gro_cells.h>
 #include <net/ip.h>
 
 /* Keep error state on tunnel for 30 sec */
@@ -36,6 +37,8 @@ struct ip_tunnel {
 #endif
 	struct ip_tunnel_prl_entry __rcu *prl;		/* potential router list */
 	unsigned int			prl_count;	/* # of entries in PRL */
+
+	struct gro_cells		gro_cells;
 };
 
 struct ip_tunnel_prl_entry {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ef0b861ce044..b1871d993d22 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -740,8 +740,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 		tstats->rx_bytes += skb->len;
 		u64_stats_update_end(&tstats->syncp);
 
-		netif_rx(skb);
-
+		gro_cells_receive(&tunnel->gro_cells, skb);
 		return 0;
 	}
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -1319,6 +1318,9 @@ static const struct net_device_ops ipgre_netdev_ops = {
 
 static void ipgre_dev_free(struct net_device *dev)
 {
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	gro_cells_destroy(&tunnel->gro_cells);
 	free_percpu(dev->tstats);
 	free_netdev(dev);
 }
@@ -1350,6 +1352,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel;
 	struct iphdr *iph;
+	int err;
 
 	tunnel = netdev_priv(dev);
 	iph = &tunnel->parms.iph;
@@ -1376,6 +1379,12 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	if (!dev->tstats)
 		return -ENOMEM;
 
+	err = gro_cells_init(&tunnel->gro_cells, dev);
+	if (err) {
+		free_percpu(dev->tstats);
+		return err;
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 862096a8bbf8f992f6d0a1a8786ffd3fc7437e48 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 27 Sep 2012 12:06:02 +0000
Subject: IB/ipoib: Add more rtnl_link_ops callbacks

Add the rtnl_link_ops changelink and fill_info callbacks, through
which the admin can now set/get the driver mode, etc policies.
Maintain the proprietary sysfs entries only for legacy childs.

For child devices, set dev->iflink to point to the parent
device ifindex, such that user space tools can now correctly
show the uplink relation as done for vlan, macvlan, etc
devices. Pointed out by Patrick McHardy <kaber@trash.net>

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib.h         |  3 ++
 drivers/infiniband/ulp/ipoib/ipoib_cm.c      | 34 +++++++++++-----
 drivers/infiniband/ulp/ipoib/ipoib_main.c    | 16 +++++---
 drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 60 +++++++++++++++++++++++++++-
 drivers/infiniband/ulp/ipoib/ipoib_vlan.c    | 24 ++++++-----
 include/linux/if_link.h                      |  7 ++++
 6 files changed, 118 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index ac48f86f2384..196eb52f0035 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -523,6 +523,9 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 int  __init ipoib_netlink_init(void);
 void __exit ipoib_netlink_fini(void);
 
+void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
+int  ipoib_set_mode(struct net_device *dev, const char *buf);
+
 void ipoib_setup(struct net_device *dev);
 
 void ipoib_pkey_poll(struct work_struct *work);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 24683fda8e21..175581cf478c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1448,15 +1448,10 @@ static ssize_t show_mode(struct device *d, struct device_attribute *attr,
 		return sprintf(buf, "datagram\n");
 }
 
-static ssize_t set_mode(struct device *d, struct device_attribute *attr,
-			const char *buf, size_t count)
+int ipoib_set_mode(struct net_device *dev, const char *buf)
 {
-	struct net_device *dev = to_net_dev(d);
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	if (!rtnl_trylock())
-		return restart_syscall();
-
 	/* flush paths if we switch modes so that connections are restarted */
 	if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
 		set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
@@ -1467,7 +1462,8 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
 
 		ipoib_flush_paths(dev);
-		return count;
+		rtnl_lock();
+		return 0;
 	}
 
 	if (!strcmp(buf, "datagram\n")) {
@@ -1476,14 +1472,32 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 		dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
 		rtnl_unlock();
 		ipoib_flush_paths(dev);
-
-		return count;
+		rtnl_lock();
+		return 0;
 	}
-	rtnl_unlock();
 
 	return -EINVAL;
 }
 
+static ssize_t set_mode(struct device *d, struct device_attribute *attr,
+			const char *buf, size_t count)
+{
+	struct net_device *dev = to_net_dev(d);
+	int ret;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	ret = ipoib_set_mode(dev, buf);
+
+	rtnl_unlock();
+
+	if (!ret)
+		return count;
+
+	return ret;
+}
+
 static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
 
 int ipoib_cm_add_mode_attr(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 128fab102054..3f9a9ba2f9ec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1381,12 +1381,9 @@ static ssize_t show_umcast(struct device *dev,
 	return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
 }
 
-static ssize_t set_umcast(struct device *dev,
-			  struct device_attribute *attr,
-			  const char *buf, size_t count)
+void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
-	unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
+	struct ipoib_dev_priv *priv = netdev_priv(ndev);
 
 	if (umcast_val > 0) {
 		set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1394,6 +1391,15 @@ static ssize_t set_umcast(struct device *dev,
 				"by userspace\n");
 	} else
 		clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
+}
+
+static ssize_t set_umcast(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+	unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
+
+	ipoib_set_umcast(to_net_dev(dev), umcast_val);
 
 	return count;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index a7dc5ea8370e..74685936c948 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -37,8 +37,60 @@
 
 static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
 	[IFLA_IPOIB_PKEY]	= { .type = NLA_U16 },
+	[IFLA_IPOIB_MODE]	= { .type = NLA_U16 },
+	[IFLA_IPOIB_UMCAST]	= { .type = NLA_U16 },
 };
 
+static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	u16 val;
+
+	if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
+		goto nla_put_failure;
+
+	val = test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
+	if (nla_put_u16(skb, IFLA_IPOIB_MODE, val))
+		goto nla_put_failure;
+
+	val = test_bit(IPOIB_FLAG_UMCAST, &priv->flags);
+	if (nla_put_u16(skb, IFLA_IPOIB_UMCAST, val))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int ipoib_changelink(struct net_device *dev,
+			    struct nlattr *tb[], struct nlattr *data[])
+{
+	u16 mode, umcast;
+	int ret = 0;
+
+	if (data[IFLA_IPOIB_MODE]) {
+		mode  = nla_get_u16(data[IFLA_IPOIB_MODE]);
+		if (mode == IPOIB_MODE_DATAGRAM)
+			ret = ipoib_set_mode(dev, "datagram\n");
+		else if (mode == IPOIB_MODE_CONNECTED)
+			ret = ipoib_set_mode(dev, "connected\n");
+		else
+			ret = -EINVAL;
+
+		if (ret < 0)
+			goto out_err;
+	}
+
+	if (data[IFLA_IPOIB_UMCAST]) {
+		umcast = nla_get_u16(data[IFLA_IPOIB_UMCAST]);
+		ipoib_set_umcast(dev, umcast);
+	}
+
+out_err:
+	return ret;
+}
+
 static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
 			       struct nlattr *tb[], struct nlattr *data[])
 {
@@ -69,6 +121,8 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
 
 	err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
 
+	if (!err && data)
+		err = ipoib_changelink(dev, tb, data);
 	return err;
 }
 
@@ -87,7 +141,9 @@ static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head
 
 static size_t ipoib_get_size(const struct net_device *dev)
 {
-	return nla_total_size(2);	/* IFLA_IPOIB_PKEY */
+	return nla_total_size(2) +	/* IFLA_IPOIB_PKEY   */
+		nla_total_size(2) +	/* IFLA_IPOIB_MODE   */
+		nla_total_size(2);	/* IFLA_IPOIB_UMCAST */
 }
 
 static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
@@ -97,8 +153,10 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
 	.priv_size	= sizeof(struct ipoib_dev_priv),
 	.setup		= ipoib_setup,
 	.newlink	= ipoib_new_child_link,
+	.changelink	= ipoib_changelink,
 	.dellink	= ipoib_unregister_child_dev,
 	.get_size	= ipoib_get_size,
+	.fill_info	= ipoib_fill_info,
 };
 
 int __init ipoib_netlink_init(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 238bbf9b2bea..8292554bccb5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -88,17 +88,21 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 
 	ipoib_create_debug_files(priv->dev);
 
-	if (ipoib_cm_add_mode_attr(priv->dev))
-		goto sysfs_failed;
-	if (ipoib_add_pkey_attr(priv->dev))
-		goto sysfs_failed;
-	if (ipoib_add_umcast_attr(priv->dev))
-		goto sysfs_failed;
-
-	if (device_create_file(&priv->dev->dev, &dev_attr_parent))
-		goto sysfs_failed;
+	/* RTNL childs don't need proprietary sysfs entries */
+	if (type == IPOIB_LEGACY_CHILD) {
+		if (ipoib_cm_add_mode_attr(priv->dev))
+			goto sysfs_failed;
+		if (ipoib_add_pkey_attr(priv->dev))
+			goto sysfs_failed;
+		if (ipoib_add_umcast_attr(priv->dev))
+			goto sysfs_failed;
+
+		if (device_create_file(&priv->dev->dev, &dev_attr_parent))
+			goto sysfs_failed;
+	}
 
-	priv->child_type = type;
+	priv->child_type  = type;
+	priv->dev->iflink = ppriv->dev->ifindex;
 	list_add_tail(&priv->list, &ppriv->child_intfs);
 
 	return 0;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 24c0dd09af54..4491177e984d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -404,9 +404,16 @@ struct ifla_port_vsi {
 enum {
 	IFLA_IPOIB_UNSPEC,
 	IFLA_IPOIB_PKEY,
+	IFLA_IPOIB_MODE,
+	IFLA_IPOIB_UMCAST,
 	__IFLA_IPOIB_MAX
 };
 
+enum {
+	IPOIB_MODE_DATAGRAM  = 0, /* using unreliable datagram QPs */
+	IPOIB_MODE_CONNECTED = 1, /* using connected QPs */
+};
+
 #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
 
 #endif /* _LINUX_IF_LINK_H */
-- 
cgit v1.2.3


From edc7d57327bd08bfd04f41531d49b176369db218 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 1 Oct 2012 12:32:33 +0000
Subject: netlink: add attributes to fdb interface

Later changes need to be able to refer to neighbour attributes
when doing fdb_add.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +-
 drivers/net/macvlan.c                         | 2 +-
 include/linux/netdevice.h                     | 4 +++-
 net/bridge/br_fdb.c                           | 3 ++-
 net/bridge/br_private.h                       | 2 +-
 net/core/rtnetlink.c                          | 6 ++++--
 6 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 29465be2a14a..0ba6d9561bdb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6889,7 +6889,7 @@ static int ixgbe_set_features(struct net_device *netdev,
 	return 0;
 }
 
-static int ixgbe_ndo_fdb_add(struct ndmsg *ndm,
+static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			     struct net_device *dev,
 			     const unsigned char *addr,
 			     u16 flags)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 815dfcfbc7b9..68a43fe602e7 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -546,7 +546,7 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
 	return 0;
 }
 
-static int macvlan_fdb_add(struct ndmsg *ndm,
+static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
 			   const unsigned char *addr,
 			   u16 flags)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dd320bb22a5a..807a610f193c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -904,7 +904,8 @@ struct netdev_fcoe_hbainfo {
  *	feature set might be less than what was returned by ndo_fix_features()).
  *	Must return >0 or -errno if it changed dev->features itself.
  *
- * int (*ndo_fdb_add)(struct ndmsg *ndm, struct net_device *dev,
+ * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
+ *		      struct net_device *dev,
  *		      const unsigned char *addr, u16 flags)
  *	Adds an FDB entry to dev for addr.
  * int (*ndo_fdb_del)(struct ndmsg *ndm, struct net_device *dev,
@@ -1014,6 +1015,7 @@ struct net_device_ops {
 	void			(*ndo_neigh_destroy)(struct neighbour *n);
 
 	int			(*ndo_fdb_add)(struct ndmsg *ndm,
+					       struct nlattr *tb[],
 					       struct net_device *dev,
 					       const unsigned char *addr,
 					       u16 flags);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 02861190a3d4..d9576e6de2b8 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -608,7 +608,8 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 }
 
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
-int br_fdb_add(struct ndmsg *ndm, struct net_device *dev,
+int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+	       struct net_device *dev,
 	       const unsigned char *addr, u16 nlh_flags)
 {
 	struct net_bridge_port *p;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 11a984b87e62..9b278c4ebee1 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -364,7 +364,7 @@ extern void br_fdb_update(struct net_bridge *br,
 extern int br_fdb_delete(struct ndmsg *ndm,
 			 struct net_device *dev,
 			 const unsigned char *addr);
-extern int br_fdb_add(struct ndmsg *nlh,
+extern int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[],
 		      struct net_device *dev,
 		      const unsigned char *addr,
 		      u16 nlh_flags);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 92575370d9f0..76d4c2c3c89b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2090,7 +2090,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) &&
 	    (dev->priv_flags & IFF_BRIDGE_PORT)) {
 		master = dev->master;
-		err = master->netdev_ops->ndo_fdb_add(ndm, dev, addr,
+		err = master->netdev_ops->ndo_fdb_add(ndm, tb,
+						      dev, addr,
 						      nlh->nlmsg_flags);
 		if (err)
 			goto out;
@@ -2100,7 +2101,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 	/* Embedded bridge, macvlan, and any other device support */
 	if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) {
-		err = dev->netdev_ops->ndo_fdb_add(ndm, dev, addr,
+		err = dev->netdev_ops->ndo_fdb_add(ndm, tb,
+						   dev, addr,
 						   nlh->nlmsg_flags);
 
 		if (!err) {
-- 
cgit v1.2.3


From d342894c5d2f8c7df194c793ec4059656e09ca31 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 1 Oct 2012 12:32:35 +0000
Subject: vxlan: virtual extensible lan

This is an implementation of Virtual eXtensible Local Area Network
as described in draft RFC:
  http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-02

The driver integrates a Virtual Tunnel Endpoint (VTEP) functionality
that learns MAC to IP address mapping.

This implementation has not been tested only against the Linux
userspace implementation using TAP, not against other vendor's
equipment.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/vxlan.txt |   47 ++
 drivers/net/Kconfig                |   13 +
 drivers/net/Makefile               |    1 +
 drivers/net/vxlan.c                | 1217 ++++++++++++++++++++++++++++++++++++
 include/linux/if_link.h            |   16 +
 5 files changed, 1294 insertions(+)
 create mode 100644 Documentation/networking/vxlan.txt
 create mode 100644 drivers/net/vxlan.c

(limited to 'include')

diff --git a/Documentation/networking/vxlan.txt b/Documentation/networking/vxlan.txt
new file mode 100644
index 000000000000..5b34b762d7d5
--- /dev/null
+++ b/Documentation/networking/vxlan.txt
@@ -0,0 +1,47 @@
+Virtual eXtensible Local Area Networking documentation
+======================================================
+
+The VXLAN protocol is a tunnelling protocol that is designed to
+solve the problem of limited number of available VLAN's (4096).
+With VXLAN identifier is expanded to 24 bits.
+
+It is a draft RFC standard, that is implemented by Cisco Nexus,
+Vmware and Brocade. The protocol runs over UDP using a single
+destination port (still not standardized by IANA).
+This document describes the Linux kernel tunnel device,
+there is also an implantation of VXLAN for Openvswitch.
+
+Unlike most tunnels, a VXLAN is a 1 to N network, not just point
+to point. A VXLAN device can either dynamically learn the IP address
+of the other end, in a manner similar to a learning bridge, or the
+forwarding entries can be configured statically.
+
+The management of vxlan is done in a similar fashion to it's
+too closest neighbors GRE and VLAN. Configuring VXLAN requires
+the version of iproute2 that matches the kernel release
+where VXLAN was first merged upstream.
+
+1. Create vxlan device
+  # ip li add vxlan0 type vxlan id 42 group 239.1.1.1 dev eth1
+
+This creates a new device (vxlan0). The device uses the
+the multicast group 239.1.1.1 over eth1 to handle packets where
+no entry is in the forwarding table.
+
+2. Delete vxlan device
+  # ip link delete vxlan0
+
+3. Show vxlan info
+  # ip -d show vxlan0
+
+It is possible to create, destroy and display the vxlan
+forwarding table using the new bridge command.
+
+1. Create forwarding table entry
+  # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0
+
+2. Delete forwarding table entry
+  # bridge fdb delete 00:17:42:8a:b4:05
+
+3. Show forwarding table
+  # bridge fdb show dev vxlan0
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 707ab7bd4ea5..ed5041e96b2d 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,19 @@ config MACVTAP
 	  To compile this driver as a module, choose M here: the module
 	  will be called macvtap.
 
+config VXLAN
+       tristate "Virtual eXtensible Local Area Network (VXLAN)"
+       depends on EXPERIMENTAL
+       ---help---
+	  This allows one to create vxlan virtual interfaces that provide
+	  Layer 2 Networks over Layer 3 Networks. VXLAN is often used
+	  to tunnel virtual network infrastructure in virtualized environments.
+	  For more information see:
+	    http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-02
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called vxlan.
+
 config NETCONSOLE
 	tristate "Network console logging support"
 	---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index b682a1de7be8..335db78fd987 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_NET_TEAM) += team/
 obj-$(CONFIG_TUN) += tun.o
 obj-$(CONFIG_VETH) += veth.o
 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
+obj-$(CONFIG_VXLAN) += vxlan.o
 
 #
 # Networking Drivers
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
new file mode 100644
index 000000000000..f87a98f1aec2
--- /dev/null
+++ b/drivers/net/vxlan.c
@@ -0,0 +1,1217 @@
+/*
+ * VXLAN: Virtual eXtensiable Local Area Network
+ *
+ * Copyright (c) 2012 Vyatta Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * TODO
+ *  - use IANA UDP port number (when defined)
+ *  - IPv6 (not in RFC)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+#include <linux/rculist.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/igmp.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/version.h>
+#include <linux/hash.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/rtnetlink.h>
+#include <net/route.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#define VXLAN_VERSION	"0.1"
+
+#define VNI_HASH_BITS	10
+#define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
+#define FDB_HASH_BITS	8
+#define FDB_HASH_SIZE	(1<<FDB_HASH_BITS)
+#define FDB_AGE_DEFAULT 300 /* 5 min */
+#define FDB_AGE_INTERVAL (10 * HZ)	/* rescan interval */
+
+#define VXLAN_N_VID	(1u << 24)
+#define VXLAN_VID_MASK	(VXLAN_N_VID - 1)
+/* VLAN + IP header + UDP + VXLAN */
+#define VXLAN_HEADROOM (4 + 20 + 8 + 8)
+
+#define VXLAN_FLAGS 0x08000000	/* struct vxlanhdr.vx_flags required value. */
+
+/* VXLAN protocol header */
+struct vxlanhdr {
+	__be32 vx_flags;
+	__be32 vx_vni;
+};
+
+/* UDP port for VXLAN traffic. */
+static unsigned int vxlan_port __read_mostly = 8472;
+module_param_named(udp_port, vxlan_port, uint, 0444);
+MODULE_PARM_DESC(udp_port, "Destination UDP port");
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
+/* per-net private data for this module */
+static unsigned int vxlan_net_id;
+struct vxlan_net {
+	struct socket	  *sock;	/* UDP encap socket */
+	struct hlist_head vni_list[VNI_HASH_SIZE];
+};
+
+/* Forwarding table entry */
+struct vxlan_fdb {
+	struct hlist_node hlist;	/* linked list of entries */
+	struct rcu_head	  rcu;
+	unsigned long	  updated;	/* jiffies */
+	unsigned long	  used;
+	__be32		  remote_ip;
+	u16		  state;	/* see ndm_state */
+	u8		  eth_addr[ETH_ALEN];
+};
+
+/* Per-cpu network traffic stats */
+struct vxlan_stats {
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+};
+
+/* Pseudo network device */
+struct vxlan_dev {
+	struct hlist_node hlist;
+	struct net_device *dev;
+	struct vxlan_stats __percpu *stats;
+	__u32		  vni;		/* virtual network id */
+	__be32	          gaddr;	/* multicast group */
+	__be32		  saddr;	/* source address */
+	unsigned int      link;		/* link to multicast over */
+	__u8		  tos;		/* TOS override */
+	__u8		  ttl;
+	bool		  learn;
+
+	unsigned long	  age_interval;
+	struct timer_list age_timer;
+	spinlock_t	  hash_lock;
+	unsigned int	  addrcnt;
+	unsigned int	  addrmax;
+	unsigned int	  addrexceeded;
+
+	struct hlist_head fdb_head[FDB_HASH_SIZE];
+};
+
+/* salt for hash table */
+static u32 vxlan_salt __read_mostly;
+
+static inline struct hlist_head *vni_head(struct net *net, u32 id)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+
+	return &vn->vni_list[hash_32(id, VNI_HASH_BITS)];
+}
+
+/* Look up VNI in a per net namespace table */
+static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id)
+{
+	struct vxlan_dev *vxlan;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_rcu(vxlan, node, vni_head(net, id), hlist) {
+		if (vxlan->vni == id)
+			return vxlan;
+	}
+
+	return NULL;
+}
+
+/* Fill in neighbour message in skbuff. */
+static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
+			   const struct vxlan_fdb *fdb,
+			   u32 portid, u32 seq, int type, unsigned int flags)
+{
+	unsigned long now = jiffies;
+	struct nda_cacheinfo ci;
+	struct nlmsghdr *nlh;
+	struct ndmsg *ndm;
+
+	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	ndm = nlmsg_data(nlh);
+	memset(ndm, 0, sizeof(*ndm));
+	ndm->ndm_family	= AF_BRIDGE;
+	ndm->ndm_state = fdb->state;
+	ndm->ndm_ifindex = vxlan->dev->ifindex;
+	ndm->ndm_flags = NTF_SELF;
+	ndm->ndm_type = NDA_DST;
+
+	if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
+		goto nla_put_failure;
+
+	if (nla_put_be32(skb, NDA_DST, fdb->remote_ip))
+		goto nla_put_failure;
+
+	ci.ndm_used	 = jiffies_to_clock_t(now - fdb->used);
+	ci.ndm_confirmed = 0;
+	ci.ndm_updated	 = jiffies_to_clock_t(now - fdb->updated);
+	ci.ndm_refcnt	 = 0;
+
+	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static inline size_t vxlan_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct ndmsg))
+		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
+		+ nla_total_size(sizeof(__be32)) /* NDA_DST */
+		+ nla_total_size(sizeof(struct nda_cacheinfo));
+}
+
+static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
+			     const struct vxlan_fdb *fdb, int type)
+{
+	struct net *net = dev_net(vxlan->dev);
+	struct sk_buff *skb;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(vxlan_nlmsg_size(), GFP_ATOMIC);
+	if (skb == NULL)
+		goto errout;
+
+	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+
+	rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+}
+
+/* Hash Ethernet address */
+static u32 eth_hash(const unsigned char *addr)
+{
+	u64 value = get_unaligned((u64 *)addr);
+
+	/* only want 6 bytes */
+#ifdef __BIG_ENDIAN
+	value <<= 16;
+#else
+	value >>= 16;
+#endif
+	return hash_64(value, FDB_HASH_BITS);
+}
+
+/* Hash chain to use given mac address */
+static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan,
+						const u8 *mac)
+{
+	return &vxlan->fdb_head[eth_hash(mac)];
+}
+
+/* Look up Ethernet address in forwarding table */
+static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
+					const u8 *mac)
+
+{
+	struct hlist_head *head = vxlan_fdb_head(vxlan, mac);
+	struct vxlan_fdb *f;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_rcu(f, node, head, hlist) {
+		if (compare_ether_addr(mac, f->eth_addr) == 0)
+			return f;
+	}
+
+	return NULL;
+}
+
+/* Add new entry to forwarding table -- assumes lock held */
+static int vxlan_fdb_create(struct vxlan_dev *vxlan,
+			    const u8 *mac, __be32 ip,
+			    __u16 state, __u16 flags)
+{
+	struct vxlan_fdb *f;
+	int notify = 0;
+
+	f = vxlan_find_mac(vxlan, mac);
+	if (f) {
+		if (flags & NLM_F_EXCL) {
+			netdev_dbg(vxlan->dev,
+				   "lost race to create %pM\n", mac);
+			return -EEXIST;
+		}
+		if (f->state != state) {
+			f->state = state;
+			f->updated = jiffies;
+			notify = 1;
+		}
+	} else {
+		if (!(flags & NLM_F_CREATE))
+			return -ENOENT;
+
+		if (vxlan->addrmax && vxlan->addrcnt >= vxlan->addrmax)
+			return -ENOSPC;
+
+		netdev_dbg(vxlan->dev, "add %pM -> %pI4\n", mac, &ip);
+		f = kmalloc(sizeof(*f), GFP_ATOMIC);
+		if (!f)
+			return -ENOMEM;
+
+		notify = 1;
+		f->remote_ip = ip;
+		f->state = state;
+		f->updated = f->used = jiffies;
+		memcpy(f->eth_addr, mac, ETH_ALEN);
+
+		++vxlan->addrcnt;
+		hlist_add_head_rcu(&f->hlist,
+				   vxlan_fdb_head(vxlan, mac));
+	}
+
+	if (notify)
+		vxlan_fdb_notify(vxlan, f, RTM_NEWNEIGH);
+
+	return 0;
+}
+
+static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
+{
+	netdev_dbg(vxlan->dev,
+		    "delete %pM\n", f->eth_addr);
+
+	--vxlan->addrcnt;
+	vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH);
+
+	hlist_del_rcu(&f->hlist);
+	kfree_rcu(f, rcu);
+}
+
+/* Add static entry (via netlink) */
+static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+			 struct net_device *dev,
+			 const unsigned char *addr, u16 flags)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	__be32 ip;
+	int err;
+
+	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
+		pr_info("RTM_NEWNEIGH with invalid state %#x\n",
+			ndm->ndm_state);
+		return -EINVAL;
+	}
+
+	if (tb[NDA_DST] == NULL)
+		return -EINVAL;
+
+	if (nla_len(tb[NDA_DST]) != sizeof(__be32))
+		return -EAFNOSUPPORT;
+
+	ip = nla_get_be32(tb[NDA_DST]);
+
+	spin_lock_bh(&vxlan->hash_lock);
+	err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags);
+	spin_unlock_bh(&vxlan->hash_lock);
+
+	return err;
+}
+
+/* Delete entry (via netlink) */
+static int vxlan_fdb_delete(struct ndmsg *ndm, struct net_device *dev,
+			    const unsigned char *addr)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_fdb *f;
+	int err = -ENOENT;
+
+	spin_lock_bh(&vxlan->hash_lock);
+	f = vxlan_find_mac(vxlan, addr);
+	if (f) {
+		vxlan_fdb_destroy(vxlan, f);
+		err = 0;
+	}
+	spin_unlock_bh(&vxlan->hash_lock);
+
+	return err;
+}
+
+/* Dump forwarding table */
+static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
+			  struct net_device *dev, int idx)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	unsigned int h;
+
+	for (h = 0; h < FDB_HASH_SIZE; ++h) {
+		struct vxlan_fdb *f;
+		struct hlist_node *n;
+		int err;
+
+		hlist_for_each_entry_rcu(f, n, &vxlan->fdb_head[h], hlist) {
+			if (idx < cb->args[0])
+				goto skip;
+
+			err = vxlan_fdb_info(skb, vxlan, f,
+					     NETLINK_CB(cb->skb).portid,
+					     cb->nlh->nlmsg_seq,
+					     RTM_NEWNEIGH,
+					     NLM_F_MULTI);
+			if (err < 0)
+				break;
+skip:
+			++idx;
+		}
+	}
+
+	return idx;
+}
+
+/* Watch incoming packets to learn mapping between Ethernet address
+ * and Tunnel endpoint.
+ */
+static void vxlan_snoop(struct net_device *dev,
+			__be32 src_ip, const u8 *src_mac)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_fdb *f;
+	int err;
+
+	f = vxlan_find_mac(vxlan, src_mac);
+	if (likely(f)) {
+		f->used = jiffies;
+		if (likely(f->remote_ip == src_ip))
+			return;
+
+		if (net_ratelimit())
+			netdev_info(dev,
+				    "%pM migrated from %pI4 to %pI4\n",
+				    src_mac, &f->remote_ip, &src_ip);
+
+		f->remote_ip = src_ip;
+		f->updated = jiffies;
+	} else {
+		/* learned new entry */
+		spin_lock(&vxlan->hash_lock);
+		err = vxlan_fdb_create(vxlan, src_mac, src_ip,
+				       NUD_REACHABLE,
+				       NLM_F_EXCL|NLM_F_CREATE);
+		spin_unlock(&vxlan->hash_lock);
+	}
+}
+
+
+/* See if multicast group is already in use by other ID */
+static bool vxlan_group_used(struct vxlan_net *vn,
+			     const struct vxlan_dev *this)
+{
+	const struct vxlan_dev *vxlan;
+	struct hlist_node *node;
+	unsigned h;
+
+	for (h = 0; h < VNI_HASH_SIZE; ++h)
+		hlist_for_each_entry(vxlan, node, &vn->vni_list[h], hlist) {
+			if (vxlan == this)
+				continue;
+
+			if (!netif_running(vxlan->dev))
+				continue;
+
+			if (vxlan->gaddr == this->gaddr)
+				return true;
+		}
+
+	return false;
+}
+
+/* kernel equivalent to IP_ADD_MEMBERSHIP */
+static int vxlan_join_group(struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+	struct sock *sk = vn->sock->sk;
+	struct ip_mreqn mreq = {
+		.imr_multiaddr.s_addr = vxlan->gaddr,
+	};
+	int err;
+
+	/* Already a member of group */
+	if (vxlan_group_used(vn, vxlan))
+		return 0;
+
+	/* Need to drop RTNL to call multicast join */
+	rtnl_unlock();
+	lock_sock(sk);
+	err = ip_mc_join_group(sk, &mreq);
+	release_sock(sk);
+	rtnl_lock();
+
+	return err;
+}
+
+
+/* kernel equivalent to IP_DROP_MEMBERSHIP */
+static int vxlan_leave_group(struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+	int err = 0;
+	struct sock *sk = vn->sock->sk;
+	struct ip_mreqn mreq = {
+		.imr_multiaddr.s_addr = vxlan->gaddr,
+	};
+
+	/* Only leave group when last vxlan is done. */
+	if (vxlan_group_used(vn, vxlan))
+		return 0;
+
+	/* Need to drop RTNL to call multicast leave */
+	rtnl_unlock();
+	lock_sock(sk);
+	err = ip_mc_leave_group(sk, &mreq);
+	release_sock(sk);
+	rtnl_lock();
+
+	return err;
+}
+
+/* Callback from net/ipv4/udp.c to receive packets */
+static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct iphdr *oip;
+	struct vxlanhdr *vxh;
+	struct vxlan_dev *vxlan;
+	struct vxlan_stats *stats;
+	__u32 vni;
+	int err;
+
+	/* pop off outer UDP header */
+	__skb_pull(skb, sizeof(struct udphdr));
+
+	/* Need Vxlan and inner Ethernet header to be present */
+	if (!pskb_may_pull(skb, sizeof(struct vxlanhdr)))
+		goto error;
+
+	/* Drop packets with reserved bits set */
+	vxh = (struct vxlanhdr *) skb->data;
+	if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
+	    (vxh->vx_vni & htonl(0xff))) {
+		netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
+			   ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
+		goto error;
+	}
+
+	__skb_pull(skb, sizeof(struct vxlanhdr));
+	skb_postpull_rcsum(skb, eth_hdr(skb), sizeof(struct vxlanhdr));
+
+	/* Is this VNI defined? */
+	vni = ntohl(vxh->vx_vni) >> 8;
+	vxlan = vxlan_find_vni(sock_net(sk), vni);
+	if (!vxlan) {
+		netdev_dbg(skb->dev, "unknown vni %d\n", vni);
+		goto drop;
+	}
+
+	if (!pskb_may_pull(skb, ETH_HLEN)) {
+		vxlan->dev->stats.rx_length_errors++;
+		vxlan->dev->stats.rx_errors++;
+		goto drop;
+	}
+
+	/* Re-examine inner Ethernet packet */
+	oip = ip_hdr(skb);
+	skb->protocol = eth_type_trans(skb, vxlan->dev);
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+	/* Ignore packet loops (and multicast echo) */
+	if (compare_ether_addr(eth_hdr(skb)->h_source,
+			       vxlan->dev->dev_addr) == 0)
+		goto drop;
+
+	if (vxlan->learn)
+		vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source);
+
+	__skb_tunnel_rx(skb, vxlan->dev);
+	skb_reset_network_header(skb);
+
+	err = IP_ECN_decapsulate(oip, skb);
+	if (unlikely(err)) {
+		if (log_ecn_error)
+			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+					     &oip->saddr, oip->tos);
+		if (err > 1) {
+			++vxlan->dev->stats.rx_frame_errors;
+			++vxlan->dev->stats.rx_errors;
+			goto drop;
+		}
+	}
+
+	stats = this_cpu_ptr(vxlan->stats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+	u64_stats_update_end(&stats->syncp);
+
+	netif_rx(skb);
+
+	return 0;
+error:
+	/* Put UDP header back */
+	__skb_push(skb, sizeof(struct udphdr));
+
+	return 1;
+drop:
+	/* Consume bad packet */
+	kfree_skb(skb);
+	return 0;
+}
+
+/* Extract dsfield from inner protocol */
+static inline u8 vxlan_get_dsfield(const struct iphdr *iph,
+				   const struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP))
+		return iph->tos;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		return ipv6_get_dsfield((const struct ipv6hdr *)iph);
+	else
+		return 0;
+}
+
+/* Propogate ECN bits out */
+static inline u8 vxlan_ecn_encap(u8 tos,
+				 const struct iphdr *iph,
+				 const struct sk_buff *skb)
+{
+	u8 inner = vxlan_get_dsfield(iph, skb);
+
+	return INET_ECN_encapsulate(tos, inner);
+}
+
+/* Transmit local packets over Vxlan
+ *
+ * Outer IP header inherits ECN and DF from inner header.
+ * Outer UDP destination is the VXLAN assigned port.
+ *           source port is based on hash of flow if available
+ *                       otherwise use a random value
+ */
+static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct rtable *rt;
+	const struct ethhdr *eth;
+	const struct iphdr *old_iph;
+	struct iphdr *iph;
+	struct vxlanhdr *vxh;
+	struct udphdr *uh;
+	struct flowi4 fl4;
+	struct vxlan_fdb *f;
+	unsigned int pkt_len = skb->len;
+	u32 hash;
+	__be32 dst;
+	__be16 df = 0;
+	__u8 tos, ttl;
+	int err;
+
+	/* Need space for new headers (invalidates iph ptr) */
+	if (skb_cow_head(skb, VXLAN_HEADROOM))
+		goto drop;
+
+	eth = (void *)skb->data;
+	old_iph = ip_hdr(skb);
+
+	if (!is_multicast_ether_addr(eth->h_dest) &&
+	    (f = vxlan_find_mac(vxlan, eth->h_dest)))
+		dst = f->remote_ip;
+	else if (vxlan->gaddr) {
+		dst = vxlan->gaddr;
+	} else
+		goto drop;
+
+	ttl = vxlan->ttl;
+	if (!ttl && IN_MULTICAST(ntohl(dst)))
+		ttl = 1;
+
+	tos = vxlan->tos;
+	if (tos == 1)
+		tos = vxlan_get_dsfield(old_iph, skb);
+
+	hash = skb_get_rxhash(skb);
+
+	rt = ip_route_output_gre(dev_net(dev), &fl4, dst,
+				 vxlan->saddr, vxlan->vni,
+				 RT_TOS(tos), vxlan->link);
+	if (IS_ERR(rt)) {
+		netdev_dbg(dev, "no route to %pI4\n", &dst);
+		dev->stats.tx_carrier_errors++;
+		goto tx_error;
+	}
+
+	if (rt->dst.dev == dev) {
+		netdev_dbg(dev, "circular route to %pI4\n", &dst);
+		ip_rt_put(rt);
+		dev->stats.collisions++;
+		goto tx_error;
+	}
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->dst);
+
+	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
+	vxh->vx_flags = htonl(VXLAN_FLAGS);
+	vxh->vx_vni = htonl(vxlan->vni << 8);
+
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+
+	uh->dest = htons(vxlan_port);
+	uh->source = hash ? :random32();
+
+	uh->len = htons(skb->len);
+	uh->check = 0;
+
+	__skb_push(skb, sizeof(*iph));
+	skb_reset_network_header(skb);
+	iph		= ip_hdr(skb);
+	iph->version	= 4;
+	iph->ihl	= sizeof(struct iphdr) >> 2;
+	iph->frag_off	= df;
+	iph->protocol	= IPPROTO_UDP;
+	iph->tos	= vxlan_ecn_encap(tos, old_iph, skb);
+	iph->daddr	= fl4.daddr;
+	iph->saddr	= fl4.saddr;
+	iph->ttl	= ttl ? : ip4_dst_hoplimit(&rt->dst);
+
+	/* See __IPTUNNEL_XMIT */
+	skb->ip_summed = CHECKSUM_NONE;
+	ip_select_ident(iph, &rt->dst, NULL);
+
+	err = ip_local_out(skb);
+	if (likely(net_xmit_eval(err) == 0)) {
+		struct vxlan_stats *stats = this_cpu_ptr(vxlan->stats);
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->tx_packets++;
+		stats->tx_bytes += pkt_len;
+		u64_stats_update_end(&stats->syncp);
+	} else {
+		dev->stats.tx_errors++;
+		dev->stats.tx_aborted_errors++;
+	}
+	return NETDEV_TX_OK;
+
+drop:
+	dev->stats.tx_dropped++;
+	goto tx_free;
+
+tx_error:
+	dev->stats.tx_errors++;
+tx_free:
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+/* Walk the forwarding table and purge stale entries */
+static void vxlan_cleanup(unsigned long arg)
+{
+	struct vxlan_dev *vxlan = (struct vxlan_dev *) arg;
+	unsigned long next_timer = jiffies + FDB_AGE_INTERVAL;
+	unsigned int h;
+
+	if (!netif_running(vxlan->dev))
+		return;
+
+	spin_lock_bh(&vxlan->hash_lock);
+	for (h = 0; h < FDB_HASH_SIZE; ++h) {
+		struct hlist_node *p, *n;
+		hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
+			struct vxlan_fdb *f
+				= container_of(p, struct vxlan_fdb, hlist);
+			unsigned long timeout;
+
+			if (f->state == NUD_PERMANENT)
+				continue;
+
+			timeout = f->used + vxlan->age_interval * HZ;
+			if (time_before_eq(timeout, jiffies)) {
+				netdev_dbg(vxlan->dev,
+					   "garbage collect %pM\n",
+					   f->eth_addr);
+				f->state = NUD_STALE;
+				vxlan_fdb_destroy(vxlan, f);
+			} else if (time_before(timeout, next_timer))
+				next_timer = timeout;
+		}
+	}
+	spin_unlock_bh(&vxlan->hash_lock);
+
+	mod_timer(&vxlan->age_timer, next_timer);
+}
+
+/* Setup stats when device is created */
+static int vxlan_init(struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+
+	vxlan->stats = alloc_percpu(struct vxlan_stats);
+	if (!vxlan->stats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/* Start ageing timer and join group when device is brought up */
+static int vxlan_open(struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	int err;
+
+	if (vxlan->gaddr) {
+		err = vxlan_join_group(dev);
+		if (err)
+			return err;
+	}
+
+	if (vxlan->age_interval)
+		mod_timer(&vxlan->age_timer, jiffies + FDB_AGE_INTERVAL);
+
+	return 0;
+}
+
+/* Purge the forwarding table */
+static void vxlan_flush(struct vxlan_dev *vxlan)
+{
+	unsigned h;
+
+	spin_lock_bh(&vxlan->hash_lock);
+	for (h = 0; h < FDB_HASH_SIZE; ++h) {
+		struct hlist_node *p, *n;
+		hlist_for_each_safe(p, n, &vxlan->fdb_head[h]) {
+			struct vxlan_fdb *f
+				= container_of(p, struct vxlan_fdb, hlist);
+			vxlan_fdb_destroy(vxlan, f);
+		}
+	}
+	spin_unlock_bh(&vxlan->hash_lock);
+}
+
+/* Cleanup timer and forwarding table on shutdown */
+static int vxlan_stop(struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+
+	if (vxlan->gaddr)
+		vxlan_leave_group(dev);
+
+	del_timer_sync(&vxlan->age_timer);
+
+	vxlan_flush(vxlan);
+
+	return 0;
+}
+
+/* Merge per-cpu statistics */
+static struct rtnl_link_stats64 *vxlan_stats64(struct net_device *dev,
+					       struct rtnl_link_stats64 *stats)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_stats tmp, sum = { 0 };
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		unsigned int start;
+		const struct vxlan_stats *stats
+			= per_cpu_ptr(vxlan->stats, cpu);
+
+		do {
+			start = u64_stats_fetch_begin_bh(&stats->syncp);
+			memcpy(&tmp, stats, sizeof(tmp));
+		} while (u64_stats_fetch_retry_bh(&stats->syncp, start));
+
+		sum.tx_bytes   += tmp.tx_bytes;
+		sum.tx_packets += tmp.tx_packets;
+		sum.rx_bytes   += tmp.rx_bytes;
+		sum.rx_packets += tmp.rx_packets;
+	}
+
+	stats->tx_bytes   = sum.tx_bytes;
+	stats->tx_packets = sum.tx_packets;
+	stats->rx_bytes   = sum.rx_bytes;
+	stats->rx_packets = sum.rx_packets;
+
+	stats->multicast = dev->stats.multicast;
+	stats->rx_length_errors = dev->stats.rx_length_errors;
+	stats->rx_frame_errors = dev->stats.rx_frame_errors;
+	stats->rx_errors = dev->stats.rx_errors;
+
+	stats->tx_dropped = dev->stats.tx_dropped;
+	stats->tx_carrier_errors  = dev->stats.tx_carrier_errors;
+	stats->tx_aborted_errors  = dev->stats.tx_aborted_errors;
+	stats->collisions  = dev->stats.collisions;
+	stats->tx_errors = dev->stats.tx_errors;
+
+	return stats;
+}
+
+/* Stub, nothing needs to be done. */
+static void vxlan_set_multicast_list(struct net_device *dev)
+{
+}
+
+static const struct net_device_ops vxlan_netdev_ops = {
+	.ndo_init		= vxlan_init,
+	.ndo_open		= vxlan_open,
+	.ndo_stop		= vxlan_stop,
+	.ndo_start_xmit		= vxlan_xmit,
+	.ndo_get_stats64	= vxlan_stats64,
+	.ndo_set_rx_mode	= vxlan_set_multicast_list,
+	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_fdb_add		= vxlan_fdb_add,
+	.ndo_fdb_del		= vxlan_fdb_delete,
+	.ndo_fdb_dump		= vxlan_fdb_dump,
+};
+
+/* Info for udev, that this is a virtual tunnel endpoint */
+static struct device_type vxlan_type = {
+	.name = "vxlan",
+};
+
+static void vxlan_free(struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+
+	free_percpu(vxlan->stats);
+	free_netdev(dev);
+}
+
+/* Initialize the device structure. */
+static void vxlan_setup(struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	unsigned h;
+
+	eth_hw_addr_random(dev);
+	ether_setup(dev);
+
+	dev->netdev_ops = &vxlan_netdev_ops;
+	dev->destructor = vxlan_free;
+	SET_NETDEV_DEVTYPE(dev, &vxlan_type);
+
+	dev->tx_queue_len = 0;
+	dev->features	|= NETIF_F_LLTX;
+	dev->features	|= NETIF_F_NETNS_LOCAL;
+	dev->priv_flags	&= ~IFF_XMIT_DST_RELEASE;
+
+	spin_lock_init(&vxlan->hash_lock);
+
+	init_timer_deferrable(&vxlan->age_timer);
+	vxlan->age_timer.function = vxlan_cleanup;
+	vxlan->age_timer.data = (unsigned long) vxlan;
+
+	vxlan->dev = dev;
+
+	for (h = 0; h < FDB_HASH_SIZE; ++h)
+		INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
+}
+
+static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
+	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
+	[IFLA_VXLAN_GROUP]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+	[IFLA_VXLAN_LINK]	= { .type = NLA_U32 },
+	[IFLA_VXLAN_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+	[IFLA_VXLAN_TOS]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_TTL]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_LEARNING]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_AGEING]	= { .type = NLA_U32 },
+	[IFLA_VXLAN_LIMIT]	= { .type = NLA_U32 },
+};
+
+static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
+			pr_debug("invalid link address (not ethernet)\n");
+			return -EINVAL;
+		}
+
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
+			pr_debug("invalid all zero ethernet address\n");
+			return -EADDRNOTAVAIL;
+		}
+	}
+
+	if (!data)
+		return -EINVAL;
+
+	if (data[IFLA_VXLAN_ID]) {
+		__u32 id = nla_get_u32(data[IFLA_VXLAN_ID]);
+		if (id >= VXLAN_VID_MASK)
+			return -ERANGE;
+	}
+
+	if (data[IFLA_VXLAN_GROUP]) {
+		__be32 gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
+		if (!IN_MULTICAST(ntohl(gaddr))) {
+			pr_debug("group address is not IPv4 multicast\n");
+			return -EADDRNOTAVAIL;
+		}
+	}
+	return 0;
+}
+
+static int vxlan_newlink(struct net *net, struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[])
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	__u32 vni;
+	int err;
+
+	if (!data[IFLA_VXLAN_ID])
+		return -EINVAL;
+
+	vni = nla_get_u32(data[IFLA_VXLAN_ID]);
+	if (vxlan_find_vni(net, vni)) {
+		pr_info("duplicate VNI %u\n", vni);
+		return -EEXIST;
+	}
+	vxlan->vni = vni;
+
+	if (data[IFLA_VXLAN_GROUP])
+		vxlan->gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
+
+	if (data[IFLA_VXLAN_LOCAL])
+		vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]);
+
+	if (data[IFLA_VXLAN_LINK]) {
+		vxlan->link = nla_get_u32(data[IFLA_VXLAN_LINK]);
+
+		if (!tb[IFLA_MTU]) {
+			struct net_device *lowerdev;
+			lowerdev = __dev_get_by_index(net, vxlan->link);
+			dev->mtu = lowerdev->mtu - VXLAN_HEADROOM;
+		}
+	}
+
+	if (data[IFLA_VXLAN_TOS])
+		vxlan->tos  = nla_get_u8(data[IFLA_VXLAN_TOS]);
+
+	if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
+		vxlan->learn = true;
+
+	if (data[IFLA_VXLAN_AGEING])
+		vxlan->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
+	else
+		vxlan->age_interval = FDB_AGE_DEFAULT;
+
+	if (data[IFLA_VXLAN_LIMIT])
+		vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
+
+	err = register_netdevice(dev);
+	if (!err)
+		hlist_add_head_rcu(&vxlan->hlist, vni_head(net, vxlan->vni));
+
+	return err;
+}
+
+static void vxlan_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+
+	hlist_del_rcu(&vxlan->hlist);
+
+	unregister_netdevice_queue(dev, head);
+}
+
+static size_t vxlan_get_size(const struct net_device *dev)
+{
+
+	return nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_ID */
+		nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */
+		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LINK */
+		nla_total_size(sizeof(__be32))+	/* IFLA_VXLAN_LOCAL */
+		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TTL */
+		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TOS */
+		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_LEARNING */
+		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_AGEING */
+		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LIMIT */
+		0;
+}
+
+static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	const struct vxlan_dev *vxlan = netdev_priv(dev);
+
+	if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni))
+		goto nla_put_failure;
+
+	if (vxlan->gaddr && nla_put_u32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr))
+		goto nla_put_failure;
+
+	if (vxlan->link && nla_put_u32(skb, IFLA_VXLAN_LINK, vxlan->link))
+		goto nla_put_failure;
+
+	if (vxlan->saddr && nla_put_u32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr))
+		goto nla_put_failure;
+
+	if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) ||
+	    nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) ||
+	    nla_put_u8(skb, IFLA_VXLAN_LEARNING, vxlan->learn) ||
+	    nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) ||
+	    nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
+	.kind		= "vxlan",
+	.maxtype	= IFLA_VXLAN_MAX,
+	.policy		= vxlan_policy,
+	.priv_size	= sizeof(struct vxlan_dev),
+	.setup		= vxlan_setup,
+	.validate	= vxlan_validate,
+	.newlink	= vxlan_newlink,
+	.dellink	= vxlan_dellink,
+	.get_size	= vxlan_get_size,
+	.fill_info	= vxlan_fill_info,
+};
+
+static __net_init int vxlan_init_net(struct net *net)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct sock *sk;
+	struct sockaddr_in vxlan_addr = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_ANY),
+	};
+	int rc;
+	unsigned h;
+
+	/* Create UDP socket for encapsulation receive. */
+	rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock);
+	if (rc < 0) {
+		pr_debug("UDP socket create failed\n");
+		return rc;
+	}
+
+	vxlan_addr.sin_port = htons(vxlan_port);
+
+	rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr,
+			 sizeof(vxlan_addr));
+	if (rc < 0) {
+		pr_debug("bind for UDP socket %pI4:%u (%d)\n",
+			 &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
+		sock_release(vn->sock);
+		vn->sock = NULL;
+		return rc;
+	}
+
+	/* Disable multicast loopback */
+	sk = vn->sock->sk;
+	inet_sk(sk)->mc_loop = 0;
+
+	/* Mark socket as an encapsulation socket. */
+	udp_sk(sk)->encap_type = 1;
+	udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
+	udp_encap_enable();
+
+	for (h = 0; h < VNI_HASH_SIZE; ++h)
+		INIT_HLIST_HEAD(&vn->vni_list[h]);
+
+	return 0;
+}
+
+static __net_exit void vxlan_exit_net(struct net *net)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+
+	if (vn->sock) {
+		sock_release(vn->sock);
+		vn->sock = NULL;
+	}
+}
+
+static struct pernet_operations vxlan_net_ops = {
+	.init = vxlan_init_net,
+	.exit = vxlan_exit_net,
+	.id   = &vxlan_net_id,
+	.size = sizeof(struct vxlan_net),
+};
+
+static int __init vxlan_init_module(void)
+{
+	int rc;
+
+	get_random_bytes(&vxlan_salt, sizeof(vxlan_salt));
+
+	rc = register_pernet_device(&vxlan_net_ops);
+	if (rc)
+		goto out1;
+
+	rc = rtnl_link_register(&vxlan_link_ops);
+	if (rc)
+		goto out2;
+
+	return 0;
+
+out2:
+	unregister_pernet_device(&vxlan_net_ops);
+out1:
+	return rc;
+}
+module_init(vxlan_init_module);
+
+static void __exit vxlan_cleanup_module(void)
+{
+	rtnl_link_unregister(&vxlan_link_ops);
+	unregister_pernet_device(&vxlan_net_ops);
+}
+module_exit(vxlan_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(VXLAN_VERSION);
+MODULE_AUTHOR("Stephen Hemminger <shemminger@vyatta.com>");
+MODULE_ALIAS_RTNL_LINK("vxlan");
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 4491177e984d..e4dad4ddf085 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -272,6 +272,22 @@ enum macvlan_mode {
 
 #define MACVLAN_FLAG_NOPROMISC	1
 
+/* VXLAN section */
+enum {
+	IFLA_VXLAN_UNSPEC,
+	IFLA_VXLAN_ID,
+	IFLA_VXLAN_GROUP,
+	IFLA_VXLAN_LINK,
+	IFLA_VXLAN_LOCAL,
+	IFLA_VXLAN_TTL,
+	IFLA_VXLAN_TOS,
+	IFLA_VXLAN_LEARNING,
+	IFLA_VXLAN_AGEING,
+	IFLA_VXLAN_LIMIT,
+	__IFLA_VXLAN_MAX
+};
+#define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
+
 /* SR-IOV virtual function management section */
 
 enum {
-- 
cgit v1.2.3