From f2af74123f8c5a735248547f4286a3adc28633c1 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Tue, 20 May 2014 09:38:03 -1000
Subject: tools: ffs-test: convert to new descriptor format fixing compilation
 error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit [ac8dde11: “usb: gadget: f_fs: Add flags to descriptors block”]
which introduced a new descriptor format for FunctionFS removed the
usb_functionfs_descs_head structure, which is still used by ffs-test.
tool.

Convert ffs-test by converting it to use the new header format.  For
testing kernels prior to 3.14 (when the new format was introduced) and
parsing of the legacy headers in the new kernels, provide a compilation
flag to make the tool use the old format.

Finally, include information as to when the legacy FunctionFS headers
format has been deprecated (which is also when the new one has been
introduced).

Reported-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/uapi/linux/usb/functionfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 2a4b4a72a4f9..ecb3a31f7ca6 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -53,7 +53,7 @@ struct usb_endpoint_descriptor_no_audio {
  * structure.  Any flags that are not recognised cause the whole block to be
  * rejected with -ENOSYS.
  *
- * Legacy descriptors format:
+ * Legacy descriptors format (deprecated as of 3.14):
  *
  * | off | name      | type         | description                          |
  * |-----+-----------+--------------+--------------------------------------|
-- 
cgit v1.2.3


From 31fa97c5defca3895dc6c823096d7ba59df76125 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 11 Jun 2014 17:18:21 +0300
Subject: cfg80211: pass TDLS initiator in tdls_mgmt operations

The TDLS initiator is set once during link setup. If determines the
address ordering in the link identifier IE.

Fix dependent drivers - mwifiex and mac80211.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mwifiex/cfg80211.c |  3 ++-
 include/net/cfg80211.h                  |  2 +-
 include/uapi/linux/nl80211.h            |  5 +++++
 net/mac80211/ieee80211_i.h              |  3 ++-
 net/mac80211/tdls.c                     |  3 ++-
 net/wireless/nl80211.c                  |  4 ++++
 net/wireless/rdev-ops.h                 |  6 +++---
 net/wireless/trace.h                    | 10 +++++++---
 8 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index e95dec91a561..149d2e693bdd 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -2631,7 +2631,8 @@ static int
 mwifiex_cfg80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
 			   const u8 *peer, u8 action_code, u8 dialog_token,
 			   u16 status_code, u32 peer_capability,
-			   const u8 *extra_ies, size_t extra_ies_len)
+			   bool initiator, const u8 *extra_ies,
+			   size_t extra_ies_len)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 	int ret;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 29cb4b2bee5a..b9eeae3990cf 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2500,7 +2500,7 @@ struct cfg80211_ops {
 	int	(*tdls_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 			     const u8 *peer, u8 action_code,  u8 dialog_token,
 			     u16 status_code, u32 peer_capability,
-			     const u8 *buf, size_t len);
+			     bool initiator, const u8 *buf, size_t len);
 	int	(*tdls_oper)(struct wiphy *wiphy, struct net_device *dev,
 			     const u8 *peer, enum nl80211_tdls_operation oper);
 
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index be9519b52bb1..f1db15b9c041 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1591,6 +1591,9 @@ enum nl80211_commands {
  *	creation then the new interface will be owned by the netlink socket
  *	that created it and will be destroyed when the socket is closed
  *
+ * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
+ *	the TDLS link initiator.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1931,6 +1934,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_CSA_C_OFFSETS_TX,
 	NL80211_ATTR_MAX_CSA_COUNTERS,
 
+	NL80211_ATTR_TDLS_INITIATOR,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index fc687d2a7518..75f79c168e90 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1867,7 +1867,8 @@ int ieee80211_max_num_channels(struct ieee80211_local *local);
 int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
 			const u8 *peer, u8 action_code, u8 dialog_token,
 			u16 status_code, u32 peer_capability,
-			const u8 *extra_ies, size_t extra_ies_len);
+			bool initiator, const u8 *extra_ies,
+			size_t extra_ies_len);
 int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
 			const u8 *peer, enum nl80211_tdls_operation oper);
 
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index cafcbde70018..0b3ca2ce7ea4 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -299,7 +299,8 @@ fail:
 int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
 			const u8 *peer, u8 action_code, u8 dialog_token,
 			u16 status_code, u32 peer_capability,
-			const u8 *extra_ies, size_t extra_ies_len)
+			bool initiator, const u8 *extra_ies,
+			size_t extra_ies_len)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ba4f1723c83a..8f46b8ffbcf6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -337,6 +337,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 },
 	[NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG },
+	[NL80211_ATTR_TDLS_INITIATOR] = { .type = NLA_FLAG },
 	[NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG },
 	[NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY,
 				      .len = IEEE80211_MAX_DATA_LEN },
@@ -7365,6 +7366,7 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info)
 	u32 peer_capability = 0;
 	u16 status_code;
 	u8 *peer;
+	bool initiator;
 
 	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) ||
 	    !rdev->ops->tdls_mgmt)
@@ -7381,12 +7383,14 @@ static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info)
 	action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]);
 	status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
 	dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]);
+	initiator = nla_get_flag(info->attrs[NL80211_ATTR_TDLS_INITIATOR]);
 	if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY])
 		peer_capability =
 			nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]);
 
 	return rdev_tdls_mgmt(rdev, dev, peer, action_code,
 			      dialog_token, status_code, peer_capability,
+			      initiator,
 			      nla_data(info->attrs[NL80211_ATTR_IE]),
 			      nla_len(info->attrs[NL80211_ATTR_IE]));
 }
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index f552b0abbd70..56c2240c30ce 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -751,15 +751,15 @@ static inline int rdev_tdls_mgmt(struct cfg80211_registered_device *rdev,
 				 struct net_device *dev, u8 *peer,
 				 u8 action_code, u8 dialog_token,
 				 u16 status_code, u32 peer_capability,
-				 const u8 *buf, size_t len)
+				 bool initiator, const u8 *buf, size_t len)
 {
 	int ret;
 	trace_rdev_tdls_mgmt(&rdev->wiphy, dev, peer, action_code,
 			     dialog_token, status_code, peer_capability,
-			     buf, len);
+			     initiator, buf, len);
 	ret = rdev->ops->tdls_mgmt(&rdev->wiphy, dev, peer, action_code,
 				   dialog_token, status_code, peer_capability,
-				   buf, len);
+				   initiator, buf, len);
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 174559aade57..85474ee501eb 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1454,9 +1454,9 @@ TRACE_EVENT(rdev_tdls_mgmt,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
 		 u8 *peer, u8 action_code, u8 dialog_token,
 		 u16 status_code, u32 peer_capability,
-		 const u8 *buf, size_t len),
+		 bool initiator, const u8 *buf, size_t len),
 	TP_ARGS(wiphy, netdev, peer, action_code, dialog_token, status_code,
-		peer_capability, buf, len),
+		peer_capability, initiator, buf, len),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		NETDEV_ENTRY
@@ -1465,6 +1465,7 @@ TRACE_EVENT(rdev_tdls_mgmt,
 		__field(u8, dialog_token)
 		__field(u16, status_code)
 		__field(u32, peer_capability)
+		__field(bool, initiator)
 		__dynamic_array(u8, buf, len)
 	),
 	TP_fast_assign(
@@ -1475,13 +1476,16 @@ TRACE_EVENT(rdev_tdls_mgmt,
 		__entry->dialog_token = dialog_token;
 		__entry->status_code = status_code;
 		__entry->peer_capability = peer_capability;
+		__entry->initiator = initiator;
 		memcpy(__get_dynamic_array(buf), buf, len);
 	),
 	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", action_code: %u, "
-		  "dialog_token: %u, status_code: %u, peer_capability: %u buf: %#.2x ",
+		  "dialog_token: %u, status_code: %u, peer_capability: %u "
+		  "initiator: %s buf: %#.2x ",
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
 		  __entry->action_code, __entry->dialog_token,
 		  __entry->status_code, __entry->peer_capability,
+		  BOOL_TO_STR(__entry->initiator),
 		  ((u8 *)__get_dynamic_array(buf))[0])
 );
 
-- 
cgit v1.2.3


From 7200135bc1e61f1437dc326ae2ef2f310c50b4eb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 16 Jun 2014 13:01:52 +0200
Subject: netfilter: kill ulog targets

This has been marked as deprecated for quite some time and the NFLOG
target replacement has been also available since 2006.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge/Kbuild     |   1 -
 include/uapi/linux/netfilter_bridge/ebt_ulog.h |  38 --
 include/uapi/linux/netfilter_ipv4/Kbuild       |   1 -
 include/uapi/linux/netfilter_ipv4/ipt_ULOG.h   |  49 ---
 net/bridge/netfilter/Kconfig                   |  16 -
 net/bridge/netfilter/ebt_ulog.c                | 393 -------------------
 net/ipv4/netfilter/Kconfig                     |  19 -
 net/ipv4/netfilter/ipt_ULOG.c                  | 498 -------------------------
 8 files changed, 1015 deletions(-)
 delete mode 100644 include/uapi/linux/netfilter_bridge/ebt_ulog.h
 delete mode 100644 include/uapi/linux/netfilter_ipv4/ipt_ULOG.h
 delete mode 100644 net/bridge/netfilter/ebt_ulog.c
 delete mode 100644 net/ipv4/netfilter/ipt_ULOG.c

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter_bridge/Kbuild b/include/uapi/linux/netfilter_bridge/Kbuild
index 348717c3a22f..0fbad8ef96de 100644
--- a/include/uapi/linux/netfilter_bridge/Kbuild
+++ b/include/uapi/linux/netfilter_bridge/Kbuild
@@ -14,6 +14,5 @@ header-y += ebt_nflog.h
 header-y += ebt_pkttype.h
 header-y += ebt_redirect.h
 header-y += ebt_stp.h
-header-y += ebt_ulog.h
 header-y += ebt_vlan.h
 header-y += ebtables.h
diff --git a/include/uapi/linux/netfilter_bridge/ebt_ulog.h b/include/uapi/linux/netfilter_bridge/ebt_ulog.h
deleted file mode 100644
index 89a6becb5269..000000000000
--- a/include/uapi/linux/netfilter_bridge/ebt_ulog.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef _EBT_ULOG_H
-#define _EBT_ULOG_H
-
-#include <linux/types.h>
-
-#define EBT_ULOG_DEFAULT_NLGROUP 0
-#define EBT_ULOG_DEFAULT_QTHRESHOLD 1
-#define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */
-#define EBT_ULOG_PREFIX_LEN 32
-#define EBT_ULOG_MAX_QLEN 50
-#define EBT_ULOG_WATCHER "ulog"
-#define EBT_ULOG_VERSION 1
-
-struct ebt_ulog_info {
-	__u32 nlgroup;
-	unsigned int cprange;
-	unsigned int qthreshold;
-	char prefix[EBT_ULOG_PREFIX_LEN];
-};
-
-typedef struct ebt_ulog_packet_msg {
-	int version;
-	char indev[IFNAMSIZ];
-	char outdev[IFNAMSIZ];
-	char physindev[IFNAMSIZ];
-	char physoutdev[IFNAMSIZ];
-	char prefix[EBT_ULOG_PREFIX_LEN];
-	struct timeval stamp;
-	unsigned long mark;
-	unsigned int hook;
-	size_t data_len;
-	/* The complete packet, including Ethernet header and perhaps
-	 * the VLAN header is appended */
-	unsigned char data[0] __attribute__
-	                      ((aligned (__alignof__(struct ebt_ulog_info))));
-} ebt_ulog_packet_msg_t;
-
-#endif /* _EBT_ULOG_H */
diff --git a/include/uapi/linux/netfilter_ipv4/Kbuild b/include/uapi/linux/netfilter_ipv4/Kbuild
index fb008437dde1..ecb291df390e 100644
--- a/include/uapi/linux/netfilter_ipv4/Kbuild
+++ b/include/uapi/linux/netfilter_ipv4/Kbuild
@@ -5,7 +5,6 @@ header-y += ipt_ECN.h
 header-y += ipt_LOG.h
 header-y += ipt_REJECT.h
 header-y += ipt_TTL.h
-header-y += ipt_ULOG.h
 header-y += ipt_ah.h
 header-y += ipt_ecn.h
 header-y += ipt_ttl.h
diff --git a/include/uapi/linux/netfilter_ipv4/ipt_ULOG.h b/include/uapi/linux/netfilter_ipv4/ipt_ULOG.h
deleted file mode 100644
index 417aad280bcc..000000000000
--- a/include/uapi/linux/netfilter_ipv4/ipt_ULOG.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Header file for IP tables userspace logging, Version 1.8
- *
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
- * 
- * Distributed under the terms of GNU GPL */
-
-#ifndef _IPT_ULOG_H
-#define _IPT_ULOG_H
-
-#ifndef NETLINK_NFLOG
-#define NETLINK_NFLOG 	5
-#endif
-
-#define ULOG_DEFAULT_NLGROUP	1
-#define ULOG_DEFAULT_QTHRESHOLD	1
-
-#define ULOG_MAC_LEN	80
-#define ULOG_PREFIX_LEN	32
-
-#define ULOG_MAX_QLEN	50
-/* Why 50? Well... there is a limit imposed by the slab cache 131000
- * bytes. So the multipart netlink-message has to be < 131000 bytes.
- * Assuming a standard ethernet-mtu of 1500, we could define this up
- * to 80... but even 50 seems to be big enough. */
-
-/* private data structure for each rule with a ULOG target */
-struct ipt_ulog_info {
-	unsigned int nl_group;
-	size_t copy_range;
-	size_t qthreshold;
-	char prefix[ULOG_PREFIX_LEN];
-};
-
-/* Format of the ULOG packets passed through netlink */
-typedef struct ulog_packet_msg {
-	unsigned long mark;
-	long timestamp_sec;
-	long timestamp_usec;
-	unsigned int hook;
-	char indev_name[IFNAMSIZ];
-	char outdev_name[IFNAMSIZ];
-	size_t data_len;
-	char prefix[ULOG_PREFIX_LEN];
-	unsigned char mac_len;
-	unsigned char mac[ULOG_MAC_LEN];
-	unsigned char payload[0];
-} ulog_packet_msg_t;
-
-#endif /*_IPT_ULOG_H*/
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 629dc77874a9..3a76ac7b7141 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -202,22 +202,6 @@ config BRIDGE_EBT_LOG
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config BRIDGE_EBT_ULOG
-	tristate "ebt: ulog support (OBSOLETE)"
-	help
-	  This option enables the old bridge-specific "ebt_ulog" implementation
-	  which has been obsoleted by the new "nfnetlink_log" code (see
-	  CONFIG_NETFILTER_NETLINK_LOG).
-
-	  This option adds the ulog watcher, that you can use in any rule
-	  in any ebtables table. The packet is passed to a userspace
-	  logging daemon using netlink multicast sockets. This differs
-	  from the log watcher in the sense that the complete packet is
-	  sent to userspace instead of a descriptive text and that
-	  netlink multicast sockets are used instead of the syslog.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config BRIDGE_EBT_NFLOG
 	tristate "ebt: nflog support"
 	help
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
deleted file mode 100644
index 7c470c371e14..000000000000
--- a/net/bridge/netfilter/ebt_ulog.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- * netfilter module for userspace bridged Ethernet frames logging daemons
- *
- *	Authors:
- *	Bart De Schuymer <bdschuym@pandora.be>
- *	Harald Welte <laforge@netfilter.org>
- *
- *  November, 2004
- *
- * Based on ipt_ULOG.c, which is
- * (C) 2000-2002 by Harald Welte <laforge@netfilter.org>
- *
- * This module accepts two parameters:
- *
- * nlbufsiz:
- *   The parameter specifies how big the buffer for each netlink multicast
- * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
- * get accumulated in the kernel until they are sent to userspace. It is
- * NOT possible to allocate more than 128kB, and it is strongly discouraged,
- * because atomically allocating 128kB inside the network rx softirq is not
- * reliable. Please also keep in mind that this buffer size is allocated for
- * each nlgroup you are using, so the total kernel memory usage increases
- * by that factor.
- *
- * flushtimeout:
- *   Specify, after how many hundredths of a second the queue should be
- *   flushed even if it is not full yet.
- *
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/socket.h>
-#include <linux/skbuff.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <net/netlink.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_ulog.h>
-#include <net/netfilter/nf_log.h>
-#include <net/netns/generic.h>
-#include <net/sock.h>
-#include "../br_private.h"
-
-static unsigned int nlbufsiz = NLMSG_GOODSIZE;
-module_param(nlbufsiz, uint, 0600);
-MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) "
-			   "(defaults to 4096)");
-
-static unsigned int flushtimeout = 10;
-module_param(flushtimeout, uint, 0600);
-MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) "
-			       "(defaults to 10)");
-
-typedef struct {
-	unsigned int qlen;		/* number of nlmsgs' in the skb */
-	struct nlmsghdr *lastnlh;	/* netlink header of last msg in skb */
-	struct sk_buff *skb;		/* the pre-allocated skb */
-	struct timer_list timer;	/* the timer function */
-	spinlock_t lock;		/* the per-queue lock */
-} ebt_ulog_buff_t;
-
-static int ebt_ulog_net_id __read_mostly;
-struct ebt_ulog_net {
-	unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
-	ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
-	struct sock *ebtulognl;
-};
-
-static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
-{
-	return net_generic(net, ebt_ulog_net_id);
-}
-
-/* send one ulog_buff_t to userspace */
-static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
-{
-	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
-
-	del_timer(&ub->timer);
-
-	if (!ub->skb)
-		return;
-
-	/* last nlmsg needs NLMSG_DONE */
-	if (ub->qlen > 1)
-		ub->lastnlh->nlmsg_type = NLMSG_DONE;
-
-	NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
-	netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
-
-	ub->qlen = 0;
-	ub->skb = NULL;
-}
-
-/* timer function to flush queue in flushtimeout time */
-static void ulog_timer(unsigned long data)
-{
-	struct ebt_ulog_net *ebt = container_of((void *)data,
-						struct ebt_ulog_net,
-						nlgroup[*(unsigned int *)data]);
-
-	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
-	spin_lock_bh(&ub->lock);
-	if (ub->skb)
-		ulog_send(ebt, *(unsigned int *)data);
-	spin_unlock_bh(&ub->lock);
-}
-
-static struct sk_buff *ulog_alloc_skb(unsigned int size)
-{
-	struct sk_buff *skb;
-	unsigned int n;
-
-	n = max(size, nlbufsiz);
-	skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
-	if (!skb) {
-		if (n > size) {
-			/* try to allocate only as much as we need for
-			 * current packet */
-			skb = alloc_skb(size, GFP_ATOMIC);
-			if (!skb)
-				pr_debug("cannot even allocate buffer of size %ub\n",
-					 size);
-		}
-	}
-
-	return skb;
-}
-
-static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
-			    const struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out,
-			    const struct ebt_ulog_info *uloginfo,
-			    const char *prefix)
-{
-	ebt_ulog_packet_msg_t *pm;
-	size_t size, copy_len;
-	struct nlmsghdr *nlh;
-	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
-	unsigned int group = uloginfo->nlgroup;
-	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
-	spinlock_t *lock = &ub->lock;
-	ktime_t kt;
-
-	if ((uloginfo->cprange == 0) ||
-	    (uloginfo->cprange > skb->len + ETH_HLEN))
-		copy_len = skb->len + ETH_HLEN;
-	else
-		copy_len = uloginfo->cprange;
-
-	size = nlmsg_total_size(sizeof(*pm) + copy_len);
-	if (size > nlbufsiz) {
-		pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
-		return;
-	}
-
-	spin_lock_bh(lock);
-
-	if (!ub->skb) {
-		if (!(ub->skb = ulog_alloc_skb(size)))
-			goto unlock;
-	} else if (size > skb_tailroom(ub->skb)) {
-		ulog_send(ebt, group);
-
-		if (!(ub->skb = ulog_alloc_skb(size)))
-			goto unlock;
-	}
-
-	nlh = nlmsg_put(ub->skb, 0, ub->qlen, 0,
-			size - NLMSG_ALIGN(sizeof(*nlh)), 0);
-	if (!nlh) {
-		kfree_skb(ub->skb);
-		ub->skb = NULL;
-		goto unlock;
-	}
-	ub->qlen++;
-
-	pm = nlmsg_data(nlh);
-	memset(pm, 0, sizeof(*pm));
-
-	/* Fill in the ulog data */
-	pm->version = EBT_ULOG_VERSION;
-	kt = ktime_get_real();
-	pm->stamp = ktime_to_timeval(kt);
-	if (ub->qlen == 1)
-		ub->skb->tstamp = kt;
-	pm->data_len = copy_len;
-	pm->mark = skb->mark;
-	pm->hook = hooknr;
-	if (uloginfo->prefix != NULL)
-		strcpy(pm->prefix, uloginfo->prefix);
-
-	if (in) {
-		strcpy(pm->physindev, in->name);
-		/* If in isn't a bridge, then physindev==indev */
-		if (br_port_exists(in))
-			/* rcu_read_lock()ed by nf_hook_slow */
-			strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
-		else
-			strcpy(pm->indev, in->name);
-	}
-
-	if (out) {
-		/* If out exists, then out is a bridge port */
-		strcpy(pm->physoutdev, out->name);
-		/* rcu_read_lock()ed by nf_hook_slow */
-		strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
-	}
-
-	if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0)
-		BUG();
-
-	if (ub->qlen > 1)
-		ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
-
-	ub->lastnlh = nlh;
-
-	if (ub->qlen >= uloginfo->qthreshold)
-		ulog_send(ebt, group);
-	else if (!timer_pending(&ub->timer)) {
-		ub->timer.expires = jiffies + flushtimeout * HZ / 100;
-		add_timer(&ub->timer);
-	}
-
-unlock:
-	spin_unlock_bh(lock);
-}
-
-/* this function is registered with the netfilter core */
-static void ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum,
-   const struct sk_buff *skb, const struct net_device *in,
-   const struct net_device *out, const struct nf_loginfo *li,
-   const char *prefix)
-{
-	struct ebt_ulog_info loginfo;
-
-	if (!li || li->type != NF_LOG_TYPE_ULOG) {
-		loginfo.nlgroup = EBT_ULOG_DEFAULT_NLGROUP;
-		loginfo.cprange = 0;
-		loginfo.qthreshold = EBT_ULOG_DEFAULT_QTHRESHOLD;
-		loginfo.prefix[0] = '\0';
-	} else {
-		loginfo.nlgroup = li->u.ulog.group;
-		loginfo.cprange = li->u.ulog.copy_len;
-		loginfo.qthreshold = li->u.ulog.qthreshold;
-		strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
-	}
-
-	ebt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
-}
-
-static unsigned int
-ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct net *net = dev_net(par->in ? par->in : par->out);
-
-	ebt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
-	                par->targinfo, NULL);
-	return EBT_CONTINUE;
-}
-
-static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
-{
-	struct ebt_ulog_info *uloginfo = par->targinfo;
-
-	if (!par->net->xt.ebt_ulog_warn_deprecated) {
-		pr_info("ebt_ulog is deprecated and it will be removed soon, "
-			"use ebt_nflog instead\n");
-		par->net->xt.ebt_ulog_warn_deprecated = true;
-	}
-
-	if (uloginfo->nlgroup > 31)
-		return -EINVAL;
-
-	uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0';
-
-	if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN)
-		uloginfo->qthreshold = EBT_ULOG_MAX_QLEN;
-
-	return 0;
-}
-
-static struct xt_target ebt_ulog_tg_reg __read_mostly = {
-	.name		= "ulog",
-	.revision	= 0,
-	.family		= NFPROTO_BRIDGE,
-	.target		= ebt_ulog_tg,
-	.checkentry	= ebt_ulog_tg_check,
-	.targetsize	= sizeof(struct ebt_ulog_info),
-	.me		= THIS_MODULE,
-};
-
-static struct nf_logger ebt_ulog_logger __read_mostly = {
-	.name		= "ebt_ulog",
-	.logfn		= &ebt_log_packet,
-	.me		= THIS_MODULE,
-};
-
-static int __net_init ebt_ulog_net_init(struct net *net)
-{
-	int i;
-	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
-
-	struct netlink_kernel_cfg cfg = {
-		.groups	= EBT_ULOG_MAXNLGROUPS,
-	};
-
-	/* initialize ulog_buffers */
-	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
-		ebt->nlgroup[i] = i;
-		setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
-			    (unsigned long)&ebt->nlgroup[i]);
-		spin_lock_init(&ebt->ulog_buffers[i].lock);
-	}
-
-	ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
-	if (!ebt->ebtulognl)
-		return -ENOMEM;
-
-	nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
-	return 0;
-}
-
-static void __net_exit ebt_ulog_net_fini(struct net *net)
-{
-	int i;
-	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
-
-	nf_log_unset(net, &ebt_ulog_logger);
-	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
-		ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
-		del_timer(&ub->timer);
-
-		if (ub->skb) {
-			kfree_skb(ub->skb);
-			ub->skb = NULL;
-		}
-	}
-	netlink_kernel_release(ebt->ebtulognl);
-}
-
-static struct pernet_operations ebt_ulog_net_ops = {
-	.init = ebt_ulog_net_init,
-	.exit = ebt_ulog_net_fini,
-	.id   = &ebt_ulog_net_id,
-	.size = sizeof(struct ebt_ulog_net),
-};
-
-static int __init ebt_ulog_init(void)
-{
-	int ret;
-
-	if (nlbufsiz >= 128*1024) {
-		pr_warn("Netlink buffer has to be <= 128kB,"
-			"please try a smaller nlbufsiz parameter.\n");
-		return -EINVAL;
-	}
-
-	ret = register_pernet_subsys(&ebt_ulog_net_ops);
-	if (ret)
-		goto out_pernet;
-
-	ret = xt_register_target(&ebt_ulog_tg_reg);
-	if (ret)
-		goto out_target;
-
-	nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
-
-	return 0;
-
-out_target:
-	unregister_pernet_subsys(&ebt_ulog_net_ops);
-out_pernet:
-	return ret;
-}
-
-static void __exit ebt_ulog_fini(void)
-{
-	nf_log_unregister(&ebt_ulog_logger);
-	xt_unregister_target(&ebt_ulog_tg_reg);
-	unregister_pernet_subsys(&ebt_ulog_net_ops);
-}
-
-module_init(ebt_ulog_init);
-module_exit(ebt_ulog_fini);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
-MODULE_DESCRIPTION("Ebtables: Packet logging to netlink using ULOG");
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index a26ce035e3fa..730faacbc5f8 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -159,25 +159,6 @@ config IP_NF_TARGET_SYNPROXY
 
 	  To compile it as a module, choose M here. If unsure, say N.
 
-config IP_NF_TARGET_ULOG
-	tristate "ULOG target support (obsolete)"
-	default m if NETFILTER_ADVANCED=n
-	---help---
-
-	  This option enables the old IPv4-only "ipt_ULOG" implementation
-	  which has been obsoleted by the new "nfnetlink_log" code (see
-	  CONFIG_NETFILTER_NETLINK_LOG).
-
-	  This option adds a `ULOG' target, which allows you to create rules in
-	  any iptables table. The packet is passed to a userspace logging
-	  daemon using netlink multicast sockets; unlike the LOG target
-	  which can only be viewed through syslog.
-
-	  The appropriate userspace logging daemon (ulogd) may be obtained from
-	  <http://www.netfilter.org/projects/ulogd/index.html>
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 # NAT + specific targets: nf_conntrack
 config NF_NAT_IPV4
 	tristate "IPv4 NAT"
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
deleted file mode 100644
index 9cb993cd224b..000000000000
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ /dev/null
@@ -1,498 +0,0 @@
-/*
- * netfilter module for userspace packet logging daemons
- *
- * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This module accepts two parameters:
- *
- * nlbufsiz:
- *   The parameter specifies how big the buffer for each netlink multicast
- * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
- * get accumulated in the kernel until they are sent to userspace. It is
- * NOT possible to allocate more than 128kB, and it is strongly discouraged,
- * because atomically allocating 128kB inside the network rx softirq is not
- * reliable. Please also keep in mind that this buffer size is allocated for
- * each nlgroup you are using, so the total kernel memory usage increases
- * by that factor.
- *
- * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since
- * nlbufsiz is used with alloc_skb, which adds another
- * sizeof(struct skb_shared_info).  Use NLMSG_GOODSIZE instead.
- *
- * flushtimeout:
- *   Specify, after how many hundredths of a second the queue should be
- *   flushed even if it is not full yet.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <net/netlink.h>
-#include <linux/netdevice.h>
-#include <linux/mm.h>
-#include <linux/moduleparam.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_ULOG.h>
-#include <net/netfilter/nf_log.h>
-#include <net/netns/generic.h>
-#include <net/sock.h>
-#include <linux/bitops.h>
-#include <asm/unaligned.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
-
-#define ULOG_NL_EVENT		111		/* Harald's favorite number */
-#define ULOG_MAXNLGROUPS	32		/* numer of nlgroups */
-
-static unsigned int nlbufsiz = NLMSG_GOODSIZE;
-module_param(nlbufsiz, uint, 0400);
-MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
-
-static unsigned int flushtimeout = 10;
-module_param(flushtimeout, uint, 0600);
-MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
-
-static bool nflog = true;
-module_param(nflog, bool, 0400);
-MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
-
-/* global data structures */
-
-typedef struct {
-	unsigned int qlen;		/* number of nlmsgs' in the skb */
-	struct nlmsghdr *lastnlh;	/* netlink header of last msg in skb */
-	struct sk_buff *skb;		/* the pre-allocated skb */
-	struct timer_list timer;	/* the timer function */
-} ulog_buff_t;
-
-static int ulog_net_id __read_mostly;
-struct ulog_net {
-	unsigned int nlgroup[ULOG_MAXNLGROUPS];
-	ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
-	struct sock *nflognl;
-	spinlock_t lock;
-};
-
-static struct ulog_net *ulog_pernet(struct net *net)
-{
-	return net_generic(net, ulog_net_id);
-}
-
-/* send one ulog_buff_t to userspace */
-static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
-{
-	ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
-
-	pr_debug("ulog_send: timer is deleting\n");
-	del_timer(&ub->timer);
-
-	if (!ub->skb) {
-		pr_debug("ulog_send: nothing to send\n");
-		return;
-	}
-
-	/* last nlmsg needs NLMSG_DONE */
-	if (ub->qlen > 1)
-		ub->lastnlh->nlmsg_type = NLMSG_DONE;
-
-	NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
-	pr_debug("throwing %d packets to netlink group %u\n",
-		 ub->qlen, nlgroupnum + 1);
-	netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
-			  GFP_ATOMIC);
-
-	ub->qlen = 0;
-	ub->skb = NULL;
-	ub->lastnlh = NULL;
-}
-
-
-/* timer function to flush queue in flushtimeout time */
-static void ulog_timer(unsigned long data)
-{
-	unsigned int groupnum = *((unsigned int *)data);
-	struct ulog_net *ulog = container_of((void *)data,
-					     struct ulog_net,
-					     nlgroup[groupnum]);
-	pr_debug("timer function called, calling ulog_send\n");
-
-	/* lock to protect against somebody modifying our structure
-	 * from ipt_ulog_target at the same time */
-	spin_lock_bh(&ulog->lock);
-	ulog_send(ulog, groupnum);
-	spin_unlock_bh(&ulog->lock);
-}
-
-static struct sk_buff *ulog_alloc_skb(unsigned int size)
-{
-	struct sk_buff *skb;
-	unsigned int n;
-
-	/* alloc skb which should be big enough for a whole
-	 * multipart message. WARNING: has to be <= 131000
-	 * due to slab allocator restrictions */
-
-	n = max(size, nlbufsiz);
-	skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
-	if (!skb) {
-		if (n > size) {
-			/* try to allocate only as much as we need for
-			 * current packet */
-
-			skb = alloc_skb(size, GFP_ATOMIC);
-			if (!skb)
-				pr_debug("cannot even allocate %ub\n", size);
-		}
-	}
-
-	return skb;
-}
-
-static void ipt_ulog_packet(struct net *net,
-			    unsigned int hooknum,
-			    const struct sk_buff *skb,
-			    const struct net_device *in,
-			    const struct net_device *out,
-			    const struct ipt_ulog_info *loginfo,
-			    const char *prefix)
-{
-	ulog_buff_t *ub;
-	ulog_packet_msg_t *pm;
-	size_t size, copy_len;
-	struct nlmsghdr *nlh;
-	struct timeval tv;
-	struct ulog_net *ulog = ulog_pernet(net);
-
-	/* ffs == find first bit set, necessary because userspace
-	 * is already shifting groupnumber, but we need unshifted.
-	 * ffs() returns [1..32], we need [0..31] */
-	unsigned int groupnum = ffs(loginfo->nl_group) - 1;
-
-	/* calculate the size of the skb needed */
-	if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len)
-		copy_len = skb->len;
-	else
-		copy_len = loginfo->copy_range;
-
-	size = nlmsg_total_size(sizeof(*pm) + copy_len);
-
-	ub = &ulog->ulog_buffers[groupnum];
-
-	spin_lock_bh(&ulog->lock);
-
-	if (!ub->skb) {
-		if (!(ub->skb = ulog_alloc_skb(size)))
-			goto alloc_failure;
-	} else if (ub->qlen >= loginfo->qthreshold ||
-		   size > skb_tailroom(ub->skb)) {
-		/* either the queue len is too high or we don't have
-		 * enough room in nlskb left. send it to userspace. */
-
-		ulog_send(ulog, groupnum);
-
-		if (!(ub->skb = ulog_alloc_skb(size)))
-			goto alloc_failure;
-	}
-
-	pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
-
-	nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
-			sizeof(*pm)+copy_len, 0);
-	if (!nlh) {
-		pr_debug("error during nlmsg_put\n");
-		goto out_unlock;
-	}
-	ub->qlen++;
-
-	pm = nlmsg_data(nlh);
-	memset(pm, 0, sizeof(*pm));
-
-	/* We might not have a timestamp, get one */
-	if (skb->tstamp.tv64 == 0)
-		__net_timestamp((struct sk_buff *)skb);
-
-	/* copy hook, prefix, timestamp, payload, etc. */
-	pm->data_len = copy_len;
-	tv = ktime_to_timeval(skb->tstamp);
-	put_unaligned(tv.tv_sec, &pm->timestamp_sec);
-	put_unaligned(tv.tv_usec, &pm->timestamp_usec);
-	put_unaligned(skb->mark, &pm->mark);
-	pm->hook = hooknum;
-	if (prefix != NULL) {
-		strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1);
-		pm->prefix[sizeof(pm->prefix) - 1] = '\0';
-	}
-	else if (loginfo->prefix[0] != '\0')
-		strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
-
-	if (in && in->hard_header_len > 0 &&
-	    skb->mac_header != skb->network_header &&
-	    in->hard_header_len <= ULOG_MAC_LEN) {
-		memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
-		pm->mac_len = in->hard_header_len;
-	} else
-		pm->mac_len = 0;
-
-	if (in)
-		strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
-
-	if (out)
-		strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
-
-	/* copy_len <= skb->len, so can't fail. */
-	if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
-		BUG();
-
-	/* check if we are building multi-part messages */
-	if (ub->qlen > 1)
-		ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
-
-	ub->lastnlh = nlh;
-
-	/* if timer isn't already running, start it */
-	if (!timer_pending(&ub->timer)) {
-		ub->timer.expires = jiffies + flushtimeout * HZ / 100;
-		add_timer(&ub->timer);
-	}
-
-	/* if threshold is reached, send message to userspace */
-	if (ub->qlen >= loginfo->qthreshold) {
-		if (loginfo->qthreshold > 1)
-			nlh->nlmsg_type = NLMSG_DONE;
-		ulog_send(ulog, groupnum);
-	}
-out_unlock:
-	spin_unlock_bh(&ulog->lock);
-
-	return;
-
-alloc_failure:
-	pr_debug("Error building netlink message\n");
-	spin_unlock_bh(&ulog->lock);
-}
-
-static unsigned int
-ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
-	struct net *net = dev_net(par->in ? par->in : par->out);
-
-	ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
-	                par->targinfo, NULL);
-	return XT_CONTINUE;
-}
-
-static void ipt_logfn(struct net *net,
-		      u_int8_t pf,
-		      unsigned int hooknum,
-		      const struct sk_buff *skb,
-		      const struct net_device *in,
-		      const struct net_device *out,
-		      const struct nf_loginfo *li,
-		      const char *prefix)
-{
-	struct ipt_ulog_info loginfo;
-
-	if (!li || li->type != NF_LOG_TYPE_ULOG) {
-		loginfo.nl_group = ULOG_DEFAULT_NLGROUP;
-		loginfo.copy_range = 0;
-		loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD;
-		loginfo.prefix[0] = '\0';
-	} else {
-		loginfo.nl_group = li->u.ulog.group;
-		loginfo.copy_range = li->u.ulog.copy_len;
-		loginfo.qthreshold = li->u.ulog.qthreshold;
-		strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
-	}
-
-	ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
-}
-
-static int ulog_tg_check(const struct xt_tgchk_param *par)
-{
-	const struct ipt_ulog_info *loginfo = par->targinfo;
-
-	if (!par->net->xt.ulog_warn_deprecated) {
-		pr_info("ULOG is deprecated and it will be removed soon, "
-			"use NFLOG instead\n");
-		par->net->xt.ulog_warn_deprecated = true;
-	}
-
-	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
-		pr_debug("prefix not null-terminated\n");
-		return -EINVAL;
-	}
-	if (loginfo->qthreshold > ULOG_MAX_QLEN) {
-		pr_debug("queue threshold %Zu > MAX_QLEN\n",
-			 loginfo->qthreshold);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-#ifdef CONFIG_COMPAT
-struct compat_ipt_ulog_info {
-	compat_uint_t	nl_group;
-	compat_size_t	copy_range;
-	compat_size_t	qthreshold;
-	char		prefix[ULOG_PREFIX_LEN];
-};
-
-static void ulog_tg_compat_from_user(void *dst, const void *src)
-{
-	const struct compat_ipt_ulog_info *cl = src;
-	struct ipt_ulog_info l = {
-		.nl_group	= cl->nl_group,
-		.copy_range	= cl->copy_range,
-		.qthreshold	= cl->qthreshold,
-	};
-
-	memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
-	memcpy(dst, &l, sizeof(l));
-}
-
-static int ulog_tg_compat_to_user(void __user *dst, const void *src)
-{
-	const struct ipt_ulog_info *l = src;
-	struct compat_ipt_ulog_info cl = {
-		.nl_group	= l->nl_group,
-		.copy_range	= l->copy_range,
-		.qthreshold	= l->qthreshold,
-	};
-
-	memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
-	return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct xt_target ulog_tg_reg __read_mostly = {
-	.name		= "ULOG",
-	.family		= NFPROTO_IPV4,
-	.target		= ulog_tg,
-	.targetsize	= sizeof(struct ipt_ulog_info),
-	.checkentry	= ulog_tg_check,
-#ifdef CONFIG_COMPAT
-	.compatsize	= sizeof(struct compat_ipt_ulog_info),
-	.compat_from_user = ulog_tg_compat_from_user,
-	.compat_to_user	= ulog_tg_compat_to_user,
-#endif
-	.me		= THIS_MODULE,
-};
-
-static struct nf_logger ipt_ulog_logger __read_mostly = {
-	.name		= "ipt_ULOG",
-	.logfn		= ipt_logfn,
-	.me		= THIS_MODULE,
-};
-
-static int __net_init ulog_tg_net_init(struct net *net)
-{
-	int i;
-	struct ulog_net *ulog = ulog_pernet(net);
-	struct netlink_kernel_cfg cfg = {
-		.groups	= ULOG_MAXNLGROUPS,
-	};
-
-	spin_lock_init(&ulog->lock);
-	/* initialize ulog_buffers */
-	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
-		ulog->nlgroup[i] = i;
-		setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer,
-			    (unsigned long)&ulog->nlgroup[i]);
-	}
-
-	ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
-	if (!ulog->nflognl)
-		return -ENOMEM;
-
-	if (nflog)
-		nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
-
-	return 0;
-}
-
-static void __net_exit ulog_tg_net_exit(struct net *net)
-{
-	ulog_buff_t *ub;
-	int i;
-	struct ulog_net *ulog = ulog_pernet(net);
-
-	if (nflog)
-		nf_log_unset(net, &ipt_ulog_logger);
-
-	netlink_kernel_release(ulog->nflognl);
-
-	/* remove pending timers and free allocated skb's */
-	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
-		ub = &ulog->ulog_buffers[i];
-		pr_debug("timer is deleting\n");
-		del_timer(&ub->timer);
-
-		if (ub->skb) {
-			kfree_skb(ub->skb);
-			ub->skb = NULL;
-		}
-	}
-}
-
-static struct pernet_operations ulog_tg_net_ops = {
-	.init = ulog_tg_net_init,
-	.exit = ulog_tg_net_exit,
-	.id   = &ulog_net_id,
-	.size = sizeof(struct ulog_net),
-};
-
-static int __init ulog_tg_init(void)
-{
-	int ret;
-	pr_debug("init module\n");
-
-	if (nlbufsiz > 128*1024) {
-		pr_warn("Netlink buffer has to be <= 128kB\n");
-		return -EINVAL;
-	}
-
-	ret = register_pernet_subsys(&ulog_tg_net_ops);
-	if (ret)
-		goto out_pernet;
-
-	ret = xt_register_target(&ulog_tg_reg);
-	if (ret < 0)
-		goto out_target;
-
-	if (nflog)
-		nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
-
-	return 0;
-
-out_target:
-	unregister_pernet_subsys(&ulog_tg_net_ops);
-out_pernet:
-	return ret;
-}
-
-static void __exit ulog_tg_exit(void)
-{
-	pr_debug("cleanup_module\n");
-	if (nflog)
-		nf_log_unregister(&ipt_ulog_logger);
-	xt_unregister_target(&ulog_tg_reg);
-	unregister_pernet_subsys(&ulog_tg_net_ops);
-}
-
-module_init(ulog_tg_init);
-module_exit(ulog_tg_exit);
-- 
cgit v1.2.3


From a6eacef7fba7834da4d22762ea0d8524df3993a8 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Wed, 25 Jun 2014 10:07:05 +0200
Subject: tipc: bump max configurable window size

The maximum window size is limited by the sequence gap field, which
was expanded with bd7845337b105e090dd18912d511139945fa7586
("tipc: Expand link sequence gap field to 13 bits")
We remove the artificial limit that prevents the link window to be
set larger than 150.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 41a76acbb305..876d0a14863c 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -182,7 +182,7 @@
 
 #define TIPC_MIN_LINK_WIN 16
 #define TIPC_DEF_LINK_WIN 50
-#define TIPC_MAX_LINK_WIN 150
+#define TIPC_MAX_LINK_WIN 8191
 
 
 struct tipc_node_info {
-- 
cgit v1.2.3


From 09d27b88f15f08fcfbaf57d9b0b4489816264815 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 25 Jun 2014 13:37:13 +0200
Subject: netfilter: nft_log: complete logging support

Use the unified nf_log_packet() interface that allows us explicit
logger selection through the nf_loginfo structure.

If you specify the group attribute, this means you want to receive
logging messages through nfnetlink_log. In that case, the snaplen
and qthreshold attributes allows you to tune internal aspects of
the netlink logging infrastructure.

On the other hand, if the level is specified, then the plain text
format through the kernel logging ring is used instead, which is
also used by default if neither group nor level are indicated.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  4 ++
 net/netfilter/nft_log.c                  | 76 +++++++++++++++++++++++++-------
 2 files changed, 63 insertions(+), 17 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 2a88f645a5d8..801bdd1e56e3 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -697,6 +697,8 @@ enum nft_counter_attributes {
  * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING)
  * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32)
  * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32)
+ * @NFTA_LOG_LEVEL: log level (NLA_U32)
+ * @NFTA_LOG_FLAGS: logging flags (NLA_U32)
  */
 enum nft_log_attributes {
 	NFTA_LOG_UNSPEC,
@@ -704,6 +706,8 @@ enum nft_log_attributes {
 	NFTA_LOG_PREFIX,
 	NFTA_LOG_SNAPLEN,
 	NFTA_LOG_QTHRESHOLD,
+	NFTA_LOG_LEVEL,
+	NFTA_LOG_FLAGS,
 	__NFTA_LOG_MAX
 };
 #define NFTA_LOG_MAX		(__NFTA_LOG_MAX - 1)
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 55d4297d8417..5b1a4f5c3dcc 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -41,6 +42,8 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
 	[NFTA_LOG_PREFIX]	= { .type = NLA_STRING },
 	[NFTA_LOG_SNAPLEN]	= { .type = NLA_U32 },
 	[NFTA_LOG_QTHRESHOLD]	= { .type = NLA_U16 },
+	[NFTA_LOG_LEVEL]	= { .type = NLA_U32 },
+	[NFTA_LOG_FLAGS]	= { .type = NLA_U32 },
 };
 
 static int nft_log_init(const struct nft_ctx *ctx,
@@ -58,18 +61,41 @@ static int nft_log_init(const struct nft_ctx *ctx,
 		if (priv->prefix == NULL)
 			return -ENOMEM;
 		nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
-	} else
+	} else {
 		priv->prefix = (char *)nft_log_null_prefix;
+	}
 
-	li->type = NF_LOG_TYPE_ULOG;
+	li->type = NF_LOG_TYPE_LOG;
+	if (tb[NFTA_LOG_LEVEL] != NULL &&
+	    tb[NFTA_LOG_GROUP] != NULL)
+		return -EINVAL;
 	if (tb[NFTA_LOG_GROUP] != NULL)
+		li->type = NF_LOG_TYPE_ULOG;
+
+	switch (li->type) {
+	case NF_LOG_TYPE_LOG:
+		if (tb[NFTA_LOG_LEVEL] != NULL) {
+			li->u.log.level =
+				ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));;
+		} else {
+			li->u.log.level = 4;
+		}
+		if (tb[NFTA_LOG_FLAGS] != NULL) {
+			li->u.log.logflags =
+				ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
+		}
+		break;
+	case NF_LOG_TYPE_ULOG:
 		li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
-
-	if (tb[NFTA_LOG_SNAPLEN] != NULL)
-		li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
-	if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
-		li->u.ulog.qthreshold =
-			ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+		if (tb[NFTA_LOG_SNAPLEN] != NULL) {
+			li->u.ulog.copy_len =
+				ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
+		}
+		if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
+			li->u.ulog.qthreshold =
+				ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
+		}
+		break;
 	}
 
 	if (ctx->afi->family == NFPROTO_INET) {
@@ -113,17 +139,33 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	if (priv->prefix != nft_log_null_prefix)
 		if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
 			goto nla_put_failure;
-	if (li->u.ulog.group)
-		if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
-			goto nla_put_failure;
-	if (li->u.ulog.copy_len)
-		if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
-				 htonl(li->u.ulog.copy_len)))
+	switch (li->type) {
+	case NF_LOG_TYPE_LOG:
+		if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level)))
 			goto nla_put_failure;
-	if (li->u.ulog.qthreshold)
-		if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
-				 htons(li->u.ulog.qthreshold)))
+
+		if (li->u.log.logflags) {
+			if (nla_put_be32(skb, NFTA_LOG_FLAGS,
+					 htonl(li->u.log.logflags)))
+				goto nla_put_failure;
+		}
+		break;
+	case NF_LOG_TYPE_ULOG:
+		if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
 			goto nla_put_failure;
+
+		if (li->u.ulog.copy_len) {
+			if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
+					 htonl(li->u.ulog.copy_len)))
+				goto nla_put_failure;
+		}
+		if (li->u.ulog.qthreshold) {
+			if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
+					 htons(li->u.ulog.qthreshold)))
+				goto nla_put_failure;
+		}
+		break;
+	}
 	return 0;
 
 nla_put_failure:
-- 
cgit v1.2.3


From 9ad7860450ea65c6bbcbd52a9a25b54b07e35941 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Fri, 27 Jun 2014 10:41:00 -0500
Subject: Revert "tools: ffs-test: convert to new descriptor format fixing
 compilation error"

This reverts commit f2af74123f8c5a735248547f4286a3adc28633c1.

There is a better fix for this build error coming in a following
patch.

Signed-of-by: Felipe Balbi <balbi@ti.com>
---
 include/uapi/linux/usb/functionfs.h |  2 +-
 tools/usb/Makefile                  |  6 +-----
 tools/usb/ffs-test.c                | 20 ++------------------
 3 files changed, 4 insertions(+), 24 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index ecb3a31f7ca6..2a4b4a72a4f9 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -53,7 +53,7 @@ struct usb_endpoint_descriptor_no_audio {
  * structure.  Any flags that are not recognised cause the whole block to be
  * rejected with -ENOSYS.
  *
- * Legacy descriptors format (deprecated as of 3.14):
+ * Legacy descriptors format:
  *
  * | off | name      | type         | description                          |
  * |-----+-----------+--------------+--------------------------------------|
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
index d576b3bac3cf..acf2165c04e6 100644
--- a/tools/usb/Makefile
+++ b/tools/usb/Makefile
@@ -6,11 +6,7 @@ WARNINGS = -Wall -Wextra
 CFLAGS = $(WARNINGS) -g -I../include
 LDFLAGS = $(PTHREAD_LIBS)
 
-all: testusb ffs-test ffs-test-legacy
-
-ffs-test-legacy: ffs-test.c
-	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) -DUSE_LEGACY_DESC_HEAD
-
+all: testusb ffs-test
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
 
diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c
index 74b353d9eb50..fe1e66b6ef40 100644
--- a/tools/usb/ffs-test.c
+++ b/tools/usb/ffs-test.c
@@ -1,5 +1,5 @@
 /*
- * ffs-test.c -- user mode filesystem api for usb composite function
+ * ffs-test.c.c -- user mode filesystem api for usb composite function
  *
  * Copyright (C) 2010 Samsung Electronics
  *                    Author: Michal Nazarewicz <mina86@mina86.com>
@@ -21,8 +21,6 @@
 
 /* $(CROSS_COMPILE)cc -Wall -Wextra -g -o ffs-test ffs-test.c -lpthread */
 
-/* Uncomment to make the tool use legacy FFS descriptor headers. */
-/* #define USE_LEGACY_DESC_HEAD */
 
 #define _BSD_SOURCE /* for endian.h */
 
@@ -108,15 +106,7 @@ static void _msg(unsigned level, const char *fmt, ...)
 /******************** Descriptors and Strings *******************************/
 
 static const struct {
-	struct {
-		__le32 magic;
-		__le32 length;
-#ifndef USE_LEGACY_DESC_HEAD
-		__le32 flags;
-#endif
-		__le32 fs_count;
-		__le32 hs_count;
-	} __attribute__((packed)) header;
+	struct usb_functionfs_descs_head header;
 	struct {
 		struct usb_interface_descriptor intf;
 		struct usb_endpoint_descriptor_no_audio sink;
@@ -124,13 +114,7 @@ static const struct {
 	} __attribute__((packed)) fs_descs, hs_descs;
 } __attribute__((packed)) descriptors = {
 	.header = {
-#ifdef USE_LEGACY_DESC_HEAD
 		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC),
-#else
-		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
-		.flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC |
-				     FUNCTIONFS_HAS_HS_DESC),
-#endif
 		.length = cpu_to_le32(sizeof descriptors),
 		.fs_count = 3,
 		.hs_count = 3,
-- 
cgit v1.2.3


From 09122141785348bf9539762a5f5dbbae3761c783 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Fri, 13 Jun 2014 15:38:04 +0200
Subject: usb: gadget: f_fs: resurect usb_functionfs_descs_head structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even though usb_functionfs_descs_head structure is now deprecated,
it has been used by some user space tools.  Its removel in commit
[ac8dde1: “Add flags to descriptors block”] was an oversight
leading to build breakage for such tools.

Bring it back so that old user space tools can still be build
without problems on newer kernel versions.

Cc: <stable@vger.kernel.org>  # 3.14
Reported-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Reported-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/uapi/linux/usb/functionfs.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 2a4b4a72a4f9..24b68c59dcf8 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -33,6 +33,13 @@ struct usb_endpoint_descriptor_no_audio {
 	__u8  bInterval;
 } __attribute__((packed));
 
+/* Legacy format, deprecated as of 3.14. */
+struct usb_functionfs_descs_head {
+	__le32 magic;
+	__le32 length;
+	__le32 fs_count;
+	__le32 hs_count;
+} __attribute__((packed, deprecated));
 
 /*
  * Descriptors format:
-- 
cgit v1.2.3


From b2373f255cacdc1ea4da25e75a5a78949ffd9d66 Mon Sep 17 00:00:00 2001
From: Anand Jain <Anand.Jain@oracle.com>
Date: Tue, 3 Jun 2014 11:36:03 +0800
Subject: btrfs: create sprout should rename fsid on the sysfs as well

Creating sprout will change the fsid of the mounted root.
do the same on the sysfs as well.

reproducer:
 mount /dev/sdb /btrfs (seed disk)
 btrfs dev add /dev/sdc /btrfs
 mount -o rw,remount /btrfs
 btrfs dev del /dev/sdb /btrfs
 mount /dev/sdb /btrfs

Error:
kobject_add_internal failed for fe350492-dc28-4051-a601-e017b17e6145 with -EEXIST, don't try to register things with the same name in the same directory.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Chris Mason <clm@fb.com>
---
 fs/btrfs/volumes.c         | 9 +++++++++
 include/uapi/linux/btrfs.h | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 19b67e969b52..6ca0d9cc46f9 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2154,6 +2154,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
 	if (seeding_dev) {
+		char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
 		ret = init_first_rw_device(trans, root, device);
 		if (ret) {
 			btrfs_abort_transaction(trans, root, ret);
@@ -2164,6 +2165,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 			btrfs_abort_transaction(trans, root, ret);
 			goto error_trans;
 		}
+
+		/* Sprouting would change fsid of the mounted root,
+		 * so rename the fsid on the sysfs
+		 */
+		snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
+						root->fs_info->fsid);
+		if (kobject_rename(&root->fs_info->super_kobj, fsid_buf))
+			goto error_trans;
 	} else {
 		ret = btrfs_add_device(trans, root, device);
 		if (ret) {
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 6f9c38ce45c7..2f47824e7a36 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -38,6 +38,7 @@ struct btrfs_ioctl_vol_args {
 #define BTRFS_SUBVOL_QGROUP_INHERIT	(1ULL << 2)
 #define BTRFS_FSID_SIZE 16
 #define BTRFS_UUID_SIZE 16
+#define BTRFS_UUID_UNPARSED_SIZE	37
 
 #define BTRFS_QGROUP_INHERIT_SET_LIMITS	(1ULL << 0)
 
-- 
cgit v1.2.3


From d15156138dad40205c9fdd9abe85c9e1479ae272 Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dgilbert@interlog.com>
Date: Tue, 1 Jul 2014 10:48:05 -0600
Subject: block SG_IO: add SG_FLAG_Q_AT_HEAD flag

After the SG_IO ioctl was copied into the block layer and
later into the bsg driver, subtle differences emerged.

One difference is the way injected commands are queued through
the block layer (i.e. this is not SCSI device queueing nor SATA
NCQ). Summarizing:
  - SG_IO on block layer device: blk_exec*(at_head=false)
  - sg device SG_IO: at_head=true
  - bsg device SG_IO: at_head=true

Some time ago Boaz Harrosh introduced a sg v4 flag called
BSG_FLAG_Q_AT_TAIL to override the bsg driver default. A
recent patch titled: "sg: add SG_FLAG_Q_AT_TAIL flag"
allowed the sg driver default to be overridden. This patch
allows a SG_IO ioctl sent to a block layer device to have
its default overridden.

ChangeLog:
    - introduce SG_FLAG_Q_AT_HEAD flag in sg.h to cause
      commands that are injected via a block layer
      device SG_IO ioctl to set at_head=true
    - make comments clearer about queueing in sg.h since the
      header is used both by the sg device and block layer
      device implementations of the SG_IO ioctl.
    - introduce BSG_FLAG_Q_AT_HEAD in bsg.h for compatibility
      (it does nothing) and update comments.

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/scsi_ioctl.c       |  5 ++++-
 include/scsi/sg.h        |  3 +++
 include/uapi/linux/bsg.h | 11 ++++++-----
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index bda1497add4c..51bf5155ee75 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -290,6 +290,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	unsigned long start_time;
 	ssize_t ret = 0;
 	int writing = 0;
+	int at_head = 0;
 	struct request *rq;
 	char sense[SCSI_SENSE_BUFFERSIZE];
 	struct bio *bio;
@@ -313,6 +314,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 		case SG_DXFER_FROM_DEV:
 			break;
 		}
+	if (hdr->flags & SG_FLAG_Q_AT_HEAD)
+		at_head = 1;
 
 	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
 	if (!rq)
@@ -369,7 +372,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	 * (if he doesn't check that is his problem).
 	 * N.B. a non-zero SCSI status is _not_ necessarily an error.
 	 */
-	blk_execute_rq(q, bd_disk, rq, 0);
+	blk_execute_rq(q, bd_disk, rq, at_head);
 
 	hdr->duration = jiffies_to_msecs(jiffies - start_time);
 
diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index a9f3c6fc3f57..4734c15ab5d6 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -129,6 +129,9 @@ typedef struct sg_io_hdr
 #define SG_FLAG_MMAP_IO 4       /* request memory mapped IO */
 #define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */
 				/* user space (debug indirect IO) */
+/* defaults:: for sg driver: Q_AT_HEAD; for block layer: Q_AT_TAIL */
+#define SG_FLAG_Q_AT_TAIL 0x10
+#define SG_FLAG_Q_AT_HEAD 0x20
 
 /* following 'info' values are "or"-ed together */
 #define SG_INFO_OK_MASK 0x1
diff --git a/include/uapi/linux/bsg.h b/include/uapi/linux/bsg.h
index 7a12e1c0f371..02986cf8b6f1 100644
--- a/include/uapi/linux/bsg.h
+++ b/include/uapi/linux/bsg.h
@@ -10,12 +10,13 @@
 #define BSG_SUB_PROTOCOL_SCSI_TRANSPORT	2
 
 /*
- * For flags member below
- * sg.h sg_io_hdr also has bits defined for it's flags member. However
- * none of these bits are implemented/used by bsg. The bits below are
- * allocated to not conflict with sg.h ones anyway.
+ * For flag constants below:
+ * sg.h sg_io_hdr also has bits defined for it's flags member. These
+ * two flag values (0x10 and 0x20) have the same meaning in sg.h . For
+ * bsg the BSG_FLAG_Q_AT_HEAD flag is ignored since it is the deafult.
  */
-#define BSG_FLAG_Q_AT_TAIL 0x10 /* default, == 0 at this bit, is Q_AT_HEAD */
+#define BSG_FLAG_Q_AT_TAIL 0x10 /* default is Q_AT_HEAD */
+#define BSG_FLAG_Q_AT_HEAD 0x20
 
 struct sg_io_v4 {
 	__s32 guard;		/* [i] 'Q' to differentiate from v3 */
-- 
cgit v1.2.3


From cb553215d5d277d4838d7d6b7722e964bcf5ca1f Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Thu, 26 Jun 2014 17:41:47 +0800
Subject: include/uapi/linux/virtio_blk.h: introduce feature of VIRTIO_BLK_F_MQ

Current virtio-blk spec only supports one virtual queue for transfering
data between VM and host, and inside VM all kinds of operations on
the virtual queue needs to hold one lock, so cause below problems:

	- bad scalability
	- bad throughput

This patch requests to introduce feature of VIRTIO_BLK_F_MQ
so that more than one virtual queues can be used to virtio-blk
device, then above problems can be solved or eased.

Signed-off-by: Ming Lei <ming.lei@canonical.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/uapi/linux/virtio_blk.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 6d8e61c48563..9ad67b267584 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -40,6 +40,7 @@
 #define VIRTIO_BLK_F_WCE	9	/* Writeback mode enabled after reset */
 #define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
 #define VIRTIO_BLK_F_CONFIG_WCE	11	/* Writeback mode available in config */
+#define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
 
 #ifndef __KERNEL__
 /* Old (deprecated) name for VIRTIO_BLK_F_WCE. */
@@ -77,6 +78,10 @@ struct virtio_blk_config {
 
 	/* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
 	__u8 wce;
+	__u8 unused;
+
+	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
+	__u16 num_queues;
 } __attribute__((packed));
 
 /*
-- 
cgit v1.2.3


From d93331965729850303f6111381c1a4a9e9b8ae5a Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Wed, 25 Jun 2014 14:44:53 -0700
Subject: ipv6: Allow accepting RA from local IP addresses.

This can be used in virtual networking applications, and
may have other uses as well.  The option is disabled by
default.

A specific use case is setting up virtual routers, bridges, and
hosts on a single OS without the use of network namespaces or
virtual machines.  With proper use of ip rules, routing tables,
veth interface pairs and/or other virtual interfaces,
and applications that can bind to interfaces and/or IP addresses,
it is possibly to create one or more virtual routers with multiple
hosts attached.  The host interfaces can act as IPv6 systems,
with radvd running on the ports in the virtual routers.  With the
option provided in this patch enabled, those hosts can now properly
obtain IPv6 addresses from the radvd.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 include/uapi/linux/sysctl.h            |  1 +
 kernel/sysctl_binary.c                 |  1 +
 net/ipv6/addrconf.c                    | 10 ++++++++++
 net/ipv6/ndisc.c                       | 21 +++++++++++++--------
 7 files changed, 39 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ab42c95f9985..10e216c6e05e 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1210,6 +1210,18 @@ accept_ra_defrtr - BOOLEAN
 	Functional default: enabled if accept_ra is enabled.
 			    disabled if accept_ra is disabled.
 
+accept_ra_from_local - BOOLEAN
+	Accept RA with source-address that is found on local machine
+        if the RA is otherwise proper and able to be accepted.
+        Default is to NOT accept these as it may be an un-intended
+        network loop.
+
+	Functional default:
+           enabled if accept_ra_from_local is enabled
+               on a specific interface.
+	   disabled if accept_ra_from_local is disabled
+               on a specific interface.
+
 accept_ra_pinfo - BOOLEAN
 	Learn Prefix Information in Router Advertisement.
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c811300b0b0c..b0f2452f1d58 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -39,6 +39,7 @@ struct ipv6_devconf {
 #endif
 	__s32		proxy_ndp;
 	__s32		accept_source_route;
+	__s32		accept_ra_from_local;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	__s32		optimistic_dad;
 #endif
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 593b0e32d956..efa2666f4b8a 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -163,6 +163,7 @@ enum {
 	DEVCONF_MLDV1_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_SUPPRESS_FRAG_NDISC,
+	DEVCONF_ACCEPT_RA_FROM_LOCAL,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 6d6721341f49..43aaba1cc037 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -568,6 +568,7 @@ enum {
 	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22,
 	NET_IPV6_PROXY_NDP=23,
 	NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
+	NET_IPV6_ACCEPT_RA_FROM_LOCAL=26,
 	__NET_IPV6_MAX
 };
 
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 653cbbd9e7ad..e4ba9a5a5ccb 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -522,6 +522,7 @@ static const struct bin_table bin_net_ipv6_conf_var_table[] = {
 	{ CTL_INT,	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN,	"accept_ra_rt_info_max_plen" },
 	{ CTL_INT,	NET_IPV6_PROXY_NDP,			"proxy_ndp" },
 	{ CTL_INT,	NET_IPV6_ACCEPT_SOURCE_ROUTE,		"accept_source_route" },
+	{ CTL_INT,	NET_IPV6_ACCEPT_RA_FROM_LOCAL,		"accept_ra_from_local" },
 	{}
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5667b3003af9..358edd2272ac 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -186,6 +186,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
+	.accept_ra_from_local	= 0,
 	.accept_ra_pinfo	= 1,
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	.accept_ra_rtr_pref	= 1,
@@ -222,6 +223,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
+	.accept_ra_from_local	= 0,
 	.accept_ra_pinfo	= 1,
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	.accept_ra_rtr_pref	= 1,
@@ -4321,6 +4323,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao;
 	array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
 	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
+	array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5167,6 +5170,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec
 		},
+		{
+			.procname	= "accept_ra_from_local",
+			.data		= &ipv6_devconf.accept_ra_from_local,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 736c11c6d266..a845e3d2057e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1148,11 +1148,15 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		goto skip_defrtr;
 	}
 
-	if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
-			  NULL, 0)) {
+	/* Do not accept RA with source-addr found on local machine unless
+	 * accept_ra_from_local is set to true.
+	 */
+	if (!(in6_dev->cnf.accept_ra_from_local ||
+	      ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+			    NULL, 0))) {
 		ND_PRINTK(2, info,
-			  "RA: %s, chk_addr failed for dev: %s\n",
-			  __func__, skb->dev->name);
+			  "RA from local address detected on dev: %s: default router ignored\n",
+			  skb->dev->name);
 		goto skip_defrtr;
 	}
 
@@ -1290,11 +1294,12 @@ skip_linkparms:
 	}
 
 #ifdef CONFIG_IPV6_ROUTE_INFO
-	if (ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
-			  NULL, 0)) {
+	if (!(in6_dev->cnf.accept_ra_from_local ||
+	      ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr,
+			    NULL, 0))) {
 		ND_PRINTK(2, info,
-			  "RA: %s, chk-addr (route info) is false for dev: %s\n",
-			  __func__, skb->dev->name);
+			  "RA from local address detected on dev: %s: router info ignored.\n",
+			  skb->dev->name);
 		goto skip_routeinfo;
 	}
 
-- 
cgit v1.2.3


From cb1ce2ef387b01686469487edd45994872d52d73 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 1 Jul 2014 21:33:10 -0700
Subject: ipv6: Implement automatic flow label generation on transmit

Automatically generate flow labels for IPv6 packets on transmit.
The flow label is computed based on skb_get_hash. The flow label will
only automatically be set when it is zero otherwise (i.e. flow label
manager hasn't set one). This supports the transmit side functionality
of RFC 6438.

Added an IPv6 sysctl auto_flowlabels to enable/disable this behavior
system wide, and added IPV6_AUTOFLOWLABEL socket option to enable this
functionality per socket.

By default, auto flowlabels are disabled to avoid possible conflicts
with flow label manager, however if this feature proves useful we
may want to enable it by default.

It should also be noted that FreeBSD has already implemented automatic
flow labels (including the sysctl and socket option). In FreeBSD,
automatic flow labels default to enabled.

Performance impact:

Running super_netperf with 200 flows for TCP_RR and UDP_RR for
IPv6. Note that in UDP case, __skb_get_hash will be called for
every packet with explains slight regression. In the TCP case
the hash is saved in the socket so there is no regression.

Automatic flow labels disabled:

  TCP_RR:
    86.53% CPU utilization
    127/195/322 90/95/99% latencies
    1.40498e+06 tps

  UDP_RR:
    90.70% CPU utilization
    118/168/243 90/95/99% latencies
    1.50309e+06 tps

Automatic flow labels enabled:

  TCP_RR:
    85.90% CPU utilization
    128/199/337 90/95/99% latencies
    1.40051e+06

  UDP_RR
    92.61% CPU utilization
    115/164/236 90/95/99% latencies
    1.4687e+06

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  9 +++++++++
 include/linux/ipv6.h                   |  3 ++-
 include/net/ipv6.h                     | 20 ++++++++++++++++++++
 include/net/netns/ipv6.h               |  1 +
 include/uapi/linux/in6.h               |  1 +
 net/ipv6/af_inet6.c                    |  1 +
 net/ipv6/ip6_gre.c                     |  7 +++++--
 net/ipv6/ip6_output.c                  |  7 +++++--
 net/ipv6/ip6_tunnel.c                  |  3 ++-
 net/ipv6/ipv6_sockglue.c               |  8 ++++++++
 net/ipv6/sysctl_net_ipv6.c             |  8 ++++++++
 11 files changed, 62 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 10e216c6e05e..f35bfe43bf7a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1132,6 +1132,15 @@ flowlabel_consistency - BOOLEAN
 	FALSE: disabled
 	Default: TRUE
 
+auto_flowlabels - BOOLEAN
+	Automatically generate flow labels based based on a flow hash
+	of the packet. This allows intermediate devices, such as routers,
+	to idenfify packet flows for mechanisms like Equal Cost Multipath
+	Routing (see RFC 6438).
+	TRUE: enabled
+	FALSE: disabled
+	Default: false
+
 anycast_src_echo_reply - BOOLEAN
 	Controls the use of anycast addresses as source addresses for ICMPv6
 	echo reply
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5dc68c3ebcbd..ff560537dd61 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -199,7 +199,8 @@ struct ipv6_pinfo {
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
-				dontfrag:1;
+				dontfrag:1,
+				autoflowlabel:1;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2aa86e1135a1..4308f2ada8b3 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -699,6 +699,26 @@ static inline void ip6_set_txhash(struct sock *sk)
 	sk->sk_txhash = flow_hash_from_keys(&keys);
 }
 
+static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
+					__be32 flowlabel, bool autolabel)
+{
+	if (!flowlabel && (autolabel || net->ipv6.sysctl.auto_flowlabels)) {
+		__be32 hash;
+
+		hash = skb_get_hash(skb);
+
+		/* Since this is being sent on the wire obfuscate hash a bit
+		 * to minimize possbility that any useful information to an
+		 * attacker is leaked. Only lower 20 bits are relevant.
+		 */
+		hash ^= hash >> 12;
+
+		flowlabel = hash & IPV6_FLOWLABEL_MASK;
+	}
+
+	return flowlabel;
+}
+
 /*
  *	Header manipulation
  */
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 19d3446e59d2..eade27adecf3 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 {
 	int ip6_rt_mtu_expires;
 	int ip6_rt_min_advmss;
 	int flowlabel_consistency;
+	int auto_flowlabels;
 	int icmpv6_time;
 	int anycast_src_echo_reply;
 	int fwmark_reflect;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 0d8e0f0342dc..22b7a69619d8 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -233,6 +233,7 @@ struct in6_flowlabel_req {
 #if 0	/* not yet */
 #define IPV6_USE_MIN_MTU	63
 #endif
+#define IPV6_AUTOFLOWLABEL	64
 
 /*
  * Netfilter (1)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a426cd7099bb..2daa3a133e49 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -765,6 +765,7 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.bindv6only = 0;
 	net->ipv6.sysctl.icmpv6_time = 1*HZ;
 	net->ipv6.sysctl.flowlabel_consistency = 1;
+	net->ipv6.sysctl.auto_flowlabels = 0;
 	atomic_set(&net->ipv6.rt_genid, 0);
 
 	err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 3873181ed856..365b2b6f3942 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -723,7 +723,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
 	 *	Push down and install the IP header.
 	 */
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
 	ipv6h->hop_limit = tunnel->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
@@ -1174,7 +1175,9 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
 	__be16 *p = (__be16 *)(ipv6h+1);
 
-	ip6_flow_hdr(ipv6h, 0, t->fl.u.ip6.flowlabel);
+	ip6_flow_hdr(ipv6h, 0,
+		     ip6_make_flowlabel(dev_net(dev), skb,
+					t->fl.u.ip6.flowlabel, false));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = NEXTHDR_GRE;
 	ipv6h->saddr = t->parms.laddr;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cb9df0eb4023..fa83bdd4c3dd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -205,7 +205,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	if (hlimit < 0)
 		hlimit = ip6_dst_hoplimit(dst);
 
-	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
+	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
+						     np->autoflowlabel));
 
 	hdr->payload_len = htons(seg_len);
 	hdr->nexthdr = proto;
@@ -1569,7 +1570,9 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
+	ip6_flow_hdr(hdr, np->cork.tclass,
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
+					np->autoflowlabel));
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index afa082458360..51a1eb185ea7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1046,7 +1046,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
-	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), fl6->flowlabel);
+	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+		     ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index cc34f65179e4..b50b9e54cf53 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -834,6 +834,10 @@ pref_skip_coa:
 		np->dontfrag = valbool;
 		retv = 0;
 		break;
+	case IPV6_AUTOFLOWLABEL:
+		np->autoflowlabel = valbool;
+		retv = 0;
+		break;
 	}
 
 	release_sock(sk);
@@ -1273,6 +1277,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->dontfrag;
 		break;
 
+	case IPV6_AUTOFLOWLABEL:
+		val = np->autoflowlabel;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 058f3eca2e53..5bf7b61f8ae8 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "auto_flowlabels",
+		.data		= &init_net.ipv6.sysctl.auto_flowlabels,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "fwmark_reflect",
 		.data		= &init_net.ipv6.sysctl.fwmark_reflect,
@@ -74,6 +81,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
 	ipv6_table[0].data = &net->ipv6.sysctl.bindv6only;
 	ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply;
 	ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;
+	ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels;
 
 	ipv6_route_table = ipv6_route_sysctl_init(net);
 	if (!ipv6_route_table)
-- 
cgit v1.2.3


From c1f732ad767e37bd1d41043cbdefc0874b4d05e5 Mon Sep 17 00:00:00 2001
From: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Date: Wed, 4 Jun 2014 10:57:50 -0300
Subject: GenWQE: Add sysfs interface for bitstream reload

This patch adds an interface on sysfs for userspace to request a card
bitstream reload. It sets the appropriate register and try to perform a
fundamental reset on the PCIe slot for the card to reload the bitstream
from the chosen partition.

Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Acked-by: Frank Haverkamp <haver@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-driver-genwqe |  9 +++
 drivers/misc/genwqe/card_base.c               | 90 +++++++++++++++++++++++++++
 drivers/misc/genwqe/card_sysfs.c              | 25 ++++++++
 include/uapi/linux/genwqe/genwqe_card.h       |  1 +
 4 files changed, 125 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/ABI/testing/sysfs-driver-genwqe b/Documentation/ABI/testing/sysfs-driver-genwqe
index 1870737a1f5e..64ac6d567c4b 100644
--- a/Documentation/ABI/testing/sysfs-driver-genwqe
+++ b/Documentation/ABI/testing/sysfs-driver-genwqe
@@ -25,6 +25,15 @@ Date:           Oct 2013
 Contact:        haver@linux.vnet.ibm.com
 Description:    Interface to set the next bitstream to be used.
 
+What:           /sys/class/genwqe/genwqe<n>_card/reload_bitstream
+Date:           May 2014
+Contact:        klebers@linux.vnet.ibm.com
+Description:    Interface to trigger a PCIe card reset to reload the bitstream.
+                  sudo sh -c 'echo 1 > \
+                    /sys/class/genwqe/genwqe0_card/reload_bitstream'
+                If successfully, the card will come back with the bitstream set
+                on 'next_bitstream'.
+
 What:           /sys/class/genwqe/genwqe<n>_card/tempsens
 Date:           Oct 2013
 Contact:        haver@linux.vnet.ibm.com
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index 74d51c9bb858..e6cc3e1e7326 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -760,6 +760,89 @@ static u64 genwqe_fir_checking(struct genwqe_dev *cd)
 	return IO_ILLEGAL_VALUE;
 }
 
+/**
+ * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot
+ *
+ * Note: pci_set_pcie_reset_state() is not implemented on all archs, so this
+ * reset method will not work in all cases.
+ *
+ * Return: 0 on success or error code from pci_set_pcie_reset_state()
+ */
+static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev)
+{
+	int rc;
+
+	/*
+	 * lock pci config space access from userspace,
+	 * save state and issue PCIe fundamental reset
+	 */
+	pci_cfg_access_lock(pci_dev);
+	pci_save_state(pci_dev);
+	rc = pci_set_pcie_reset_state(pci_dev, pcie_warm_reset);
+	if (!rc) {
+		/* keep PCIe reset asserted for 250ms */
+		msleep(250);
+		pci_set_pcie_reset_state(pci_dev, pcie_deassert_reset);
+		/* Wait for 2s to reload flash and train the link */
+		msleep(2000);
+	}
+	pci_restore_state(pci_dev);
+	pci_cfg_access_unlock(pci_dev);
+	return rc;
+}
+
+/*
+ * genwqe_reload_bistream() - reload card bitstream
+ *
+ * Set the appropriate register and call fundamental reset to reaload the card
+ * bitstream.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+static int genwqe_reload_bistream(struct genwqe_dev *cd)
+{
+	struct pci_dev *pci_dev = cd->pci_dev;
+	int rc;
+
+	dev_info(&pci_dev->dev,
+		 "[%s] resetting card for bitstream reload\n",
+		 __func__);
+
+	genwqe_stop(cd);
+
+	/*
+	 * Cause a CPLD reprogram with the 'next_bitstream'
+	 * partition on PCIe hot or fundamental reset
+	 */
+	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET,
+			(cd->softreset & 0xcull) | 0x70ull);
+
+	rc = genwqe_pci_fundamental_reset(pci_dev);
+	if (rc) {
+		/*
+		 * A fundamental reset failure can be caused
+		 * by lack of support on the arch, so we just
+		 * log the error and try to start the card
+		 * again.
+		 */
+		dev_err(&pci_dev->dev,
+			"[%s] err: failed to reset card for bitstream reload\n",
+			__func__);
+	}
+
+	rc = genwqe_start(cd);
+	if (rc) {
+		dev_err(&pci_dev->dev,
+			"[%s] err: cannot start card services! (err=%d)\n",
+			__func__, rc);
+		return rc;
+	}
+	dev_info(&pci_dev->dev,
+		 "[%s] card reloaded\n", __func__);
+	return 0;
+}
+
+
 /**
  * genwqe_health_thread() - Health checking thread
  *
@@ -846,6 +929,13 @@ static int genwqe_health_thread(void *data)
 			}
 		}
 
+		if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) {
+			/* Userspace requested card bitstream reload */
+			rc = genwqe_reload_bistream(cd);
+			if (rc)
+				goto fatal_error;
+		}
+
 		cd->last_gfir = gfir;
 		cond_resched();
 	}
diff --git a/drivers/misc/genwqe/card_sysfs.c b/drivers/misc/genwqe/card_sysfs.c
index a72a99266c3c..7232e40a3ad9 100644
--- a/drivers/misc/genwqe/card_sysfs.c
+++ b/drivers/misc/genwqe/card_sysfs.c
@@ -223,6 +223,30 @@ static ssize_t next_bitstream_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(next_bitstream);
 
+static ssize_t reload_bitstream_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	int reload;
+	struct genwqe_dev *cd = dev_get_drvdata(dev);
+
+	if (kstrtoint(buf, 0, &reload) < 0)
+		return -EINVAL;
+
+	if (reload == 0x1) {
+		if (cd->card_state == GENWQE_CARD_UNUSED ||
+		    cd->card_state == GENWQE_CARD_USED)
+			cd->card_state = GENWQE_CARD_RELOAD_BITSTREAM;
+		else
+			return -EIO;
+	} else {
+		return -EINVAL;
+	}
+
+	return count;
+}
+static DEVICE_ATTR_WO(reload_bitstream);
+
 /*
  * Create device_attribute structures / params: name, mode, show, store
  * additional flag if valid in VF
@@ -239,6 +263,7 @@ static struct attribute *genwqe_attributes[] = {
 	&dev_attr_status.attr,
 	&dev_attr_freerunning_timer.attr,
 	&dev_attr_queue_working_time.attr,
+	&dev_attr_reload_bitstream.attr,
 	NULL,
 };
 
diff --git a/include/uapi/linux/genwqe/genwqe_card.h b/include/uapi/linux/genwqe/genwqe_card.h
index 795e957bb840..4fc065f29255 100644
--- a/include/uapi/linux/genwqe/genwqe_card.h
+++ b/include/uapi/linux/genwqe/genwqe_card.h
@@ -328,6 +328,7 @@ enum genwqe_card_state {
 	GENWQE_CARD_UNUSED = 0,
 	GENWQE_CARD_USED = 1,
 	GENWQE_CARD_FATAL_ERROR = 2,
+	GENWQE_CARD_RELOAD_BITSTREAM = 3,
 	GENWQE_CARD_STATE_MAX,
 };
 
-- 
cgit v1.2.3


From d5d222605503dd39513b3baeb9475ddf316511d7 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 21 Jun 2014 08:08:11 -0700
Subject: i8k: uapi: Introduce define for new highest fan speed

Some Dell laptops support fan speeds of {0, 1, 2, 3} instead of {0, 1, 2}.
Add a define for it.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Cc: Andreas Mohr <andi@lisas.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/i8k.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/i8k.h b/include/uapi/linux/i8k.h
index 1c45ba505115..133d02f03c25 100644
--- a/include/uapi/linux/i8k.h
+++ b/include/uapi/linux/i8k.h
@@ -34,7 +34,8 @@
 #define I8K_FAN_OFF		0
 #define I8K_FAN_LOW		1
 #define I8K_FAN_HIGH		2
-#define I8K_FAN_MAX		I8K_FAN_HIGH
+#define I8K_FAN_TURBO		3
+#define I8K_FAN_MAX		I8K_FAN_TURBO
 
 #define I8K_VOL_UP		1
 #define I8K_VOL_DOWN		2
-- 
cgit v1.2.3


From 875cbf3e4614cfdcc7f65033e25292aec80f09c0 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Fri, 4 Jul 2014 08:28:30 +0100
Subject: arm64: Add audit support

On AArch64, audit is supported through generic lib/audit.c and
compat_audit.c, and so this patch adds arch specific definitions required.

Acked-by Will Deacon <will.deacon@arm.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig               |  2 ++
 arch/arm64/include/asm/syscall.h | 14 ++++++++++++++
 include/uapi/linux/audit.h       |  1 +
 3 files changed, 17 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1cb806529393..ce6e733e0c05 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,7 @@ config ARM64
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC
+	select AUDIT_ARCH_COMPAT_GENERIC
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
@@ -28,6 +29,7 @@ config ARM64
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
+	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 383771eb0b87..709a574468f0 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_SYSCALL_H
 #define __ASM_SYSCALL_H
 
+#include <uapi/linux/audit.h>
+#include <linux/compat.h>
 #include <linux/err.h>
 
 extern const void *sys_call_table[];
@@ -105,4 +107,16 @@ static inline void syscall_set_arguments(struct task_struct *task,
 	memcpy(&regs->regs[i], args, n * sizeof(args[0]));
 }
 
+/*
+ * We don't care about endianness (__AUDIT_ARCH_LE bit) here because
+ * AArch64 has the same system calls both on little- and big- endian.
+ */
+static inline int syscall_get_arch(void)
+{
+	if (is_compat_task())
+		return AUDIT_ARCH_ARM;
+
+	return AUDIT_ARCH_AARCH64;
+}
+
 #endif	/* __ASM_SYSCALL_H */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index cf6714752b69..3b9ff33e1768 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -342,6 +342,7 @@ enum {
 #define __AUDIT_ARCH_64BIT 0x80000000
 #define __AUDIT_ARCH_LE	   0x40000000
 
+#define AUDIT_ARCH_AARCH64	(EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ALPHA	(EM_ALPHA|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ARM		(EM_ARM|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_ARMEB	(EM_ARM)
-- 
cgit v1.2.3


From 0b4820d6d8b6448bc9f7fac1bb1a801a53b425e1 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 12 May 2014 16:05:13 +0200
Subject: KVM: prepare for KVM_(S|G)ET_MP_STATE on other architectures

Highlight the aspects of the ioctls that are actually specific to x86
and ia64. As defined restrictions (irqchip) and mp states may not apply
to other architectures, these parts are flagged to belong to x86 and ia64.

In preparation for the use of KVM_(S|G)ET_MP_STATE by s390.
Fix a spelling error (KVM_SET_MP_STATE vs. KVM_SET_MPSTATE) on the way.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt | 21 ++++++++++++---------
 include/uapi/linux/kvm.h          |  3 ++-
 2 files changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0fe36497642c..904c61cdd311 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -988,18 +988,20 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal
+                                 which has not yet received an INIT signal [x86,
+                                 ia64]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI
+                                 now ready for a SIPI [x86, ia64]
  - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt
+                                 is waiting for an interrupt [x86, ia64]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS)
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
 
-This ioctl is only useful after KVM_CREATE_IRQCHIP.  Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
 
 
 4.39 KVM_SET_MP_STATE
@@ -1013,8 +1015,9 @@ Returns: 0 on success; -1 on error
 Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
 arguments.
 
-This ioctl is only useful after KVM_CREATE_IRQCHIP.  Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
 
 
 4.40 KVM_SET_IDENTITY_MAP_ADDR
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e11d8f170a62..37d4ec6f14d8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -399,8 +399,9 @@ struct kvm_vapic_addr {
 	__u64 vapic_addr;
 };
 
-/* for KVM_SET_MPSTATE */
+/* for KVM_SET_MP_STATE */
 
+/* not all states are valid on all architectures */
 #define KVM_MP_STATE_RUNNABLE          0
 #define KVM_MP_STATE_UNINITIALIZED     1
 #define KVM_MP_STATE_INIT_RECEIVED     2
-- 
cgit v1.2.3


From 6352e4d2dd9a349024a41356148eced553e1dce4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 10 Apr 2014 17:35:00 +0200
Subject: KVM: s390: implement KVM_(S|G)ET_MP_STATE for user space state
 control

This patch
- adds s390 specific MP states to linux headers and documents them
- implements the KVM_{SET,GET}_MP_STATE ioctls
- enables KVM_CAP_MP_STATE
- allows user space to control the VCPU state on s390.

If user space sets the VCPU state using the ioctl KVM_SET_MP_STATE, we can disable
manual changing of the VCPU state and trust user space to do the right thing.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt | 10 ++++++++--
 arch/s390/include/asm/kvm_host.h  |  1 +
 arch/s390/kvm/diag.c              |  3 ++-
 arch/s390/kvm/intercept.c         |  3 ++-
 arch/s390/kvm/kvm-s390.c          | 37 +++++++++++++++++++++++++++++++++----
 arch/s390/kvm/kvm-s390.h          |  6 ++++++
 include/uapi/linux/kvm.h          |  4 ++++
 7 files changed, 56 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 904c61cdd311..a41465bd6a5c 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -974,7 +974,7 @@ for vm-wide capabilities.
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64
+Architectures: x86, ia64, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -998,6 +998,12 @@ Possible values are:
                                  is waiting for an interrupt [x86, ia64]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
                                  accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
+ - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
+ - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
+ - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
+                                 [s390]
+ - KVM_MP_STATE_LOAD:            the vcpu is in a special load/startup state
+                                 [s390]
 
 On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
@@ -1007,7 +1013,7 @@ these architectures.
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64
+Architectures: x86, ia64, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 4181d7baabba..c2ba0208a0e1 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -418,6 +418,7 @@ struct kvm_arch{
 	int css_support;
 	int use_irqchip;
 	int use_cmma;
+	int user_cpu_state_ctrl;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
 	spinlock_t start_stop_lock;
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 0161675878a2..59bd8f991b98 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -176,7 +176,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 	}
 
-	kvm_s390_vcpu_stop(vcpu);
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index ac6b32585a36..eaf46291d361 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -73,7 +73,8 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 			return rc;
 	}
 
-	kvm_s390_vcpu_stop(vcpu);
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
 	return -EOPNOTSUPP;
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 342895350825..fdf88f7a539c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -167,6 +167,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_VM_ATTRIBUTES:
+	case KVM_CAP_MP_STATE:
 		r = 1;
 		break;
 	case KVM_CAP_NR_VCPUS:
@@ -595,7 +596,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->pp = 0;
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
 	kvm_clear_async_pf_completion_queue(vcpu);
-	kvm_s390_vcpu_stop(vcpu);
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
 	kvm_s390_clear_local_irqs(vcpu);
 }
 
@@ -980,13 +982,34 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	return -EINVAL; /* not implemented yet */
+	/* CHECK_STOP and LOAD are not supported yet */
+	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+				       KVM_MP_STATE_OPERATING;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
-	return -EINVAL; /* not implemented yet */
+	int rc = 0;
+
+	/* user space knows about this interface - let it control the state */
+	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
+
+	switch (mp_state->mp_state) {
+	case KVM_MP_STATE_STOPPED:
+		kvm_s390_vcpu_stop(vcpu);
+		break;
+	case KVM_MP_STATE_OPERATING:
+		kvm_s390_vcpu_start(vcpu);
+		break;
+	case KVM_MP_STATE_LOAD:
+	case KVM_MP_STATE_CHECK_STOP:
+		/* fall through - CHECK_STOP and LOAD are not supported yet */
+	default:
+		rc = -ENXIO;
+	}
+
+	return rc;
 }
 
 bool kvm_s390_cmma_enabled(struct kvm *kvm)
@@ -1284,7 +1307,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
-	kvm_s390_vcpu_start(vcpu);
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
+		kvm_s390_vcpu_start(vcpu);
+	} else if (is_vcpu_stopped(vcpu)) {
+		pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+				   vcpu->vcpu_id);
+		return -EINVAL;
+	}
 
 	switch (kvm_run->exit_reason) {
 	case KVM_EXIT_S390_SIEIC:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 77ed846342d4..33a0e4bed2a5 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -129,6 +129,12 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
 	vcpu->arch.sie_block->gpsw.mask |= cc << 44;
 }
 
+/* are cpu states controlled by user space */
+static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
+{
+	return kvm->arch.user_cpu_state_ctrl != 0;
+}
+
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
 void kvm_s390_tasklet(unsigned long parm);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 37d4ec6f14d8..9b744af871d7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -407,6 +407,10 @@ struct kvm_vapic_addr {
 #define KVM_MP_STATE_INIT_RECEIVED     2
 #define KVM_MP_STATE_HALTED            3
 #define KVM_MP_STATE_SIPI_RECEIVED     4
+#define KVM_MP_STATE_STOPPED           5
+#define KVM_MP_STATE_CHECK_STOP        6
+#define KVM_MP_STATE_OPERATING         7
+#define KVM_MP_STATE_LOAD              8
 
 struct kvm_mp_state {
 	__u32 mp_state;
-- 
cgit v1.2.3


From f0175ab51993d2dc2728e7b22a16ffb0c8f4cfa0 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Wed, 9 Jul 2014 12:20:08 +0200
Subject: usb: gadget: f_fs: OS descriptors support

Add support for OS descriptors. The new format of descriptors is used,
because the "flags" field is required for extensions. os_count gives
the number of OSDesc[] elements.
The format of descriptors is given in include/uapi/linux/usb/functionfs.h.

For extended properties descriptor the usb_ext_prop_desc structure covers
only a part of a descriptor, because the wPropertyNameLength is unknown
up front.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/f_fs.c           | 341 +++++++++++++++++++++++++++++++++++-
 drivers/usb/gadget/u_fs.h           |   7 +
 include/uapi/linux/usb/functionfs.h |  81 ++++++++-
 3 files changed, 419 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index e1b2ddd7964a..fe45060e0a7a 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -34,6 +34,7 @@
 
 #include "u_fs.h"
 #include "u_f.h"
+#include "u_os_desc.h"
 #include "configfs.h"
 
 #define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a honest dice roll ;) */
@@ -1644,11 +1645,19 @@ enum ffs_entity_type {
 	FFS_DESCRIPTOR, FFS_INTERFACE, FFS_STRING, FFS_ENDPOINT
 };
 
+enum ffs_os_desc_type {
+	FFS_OS_DESC, FFS_OS_DESC_EXT_COMPAT, FFS_OS_DESC_EXT_PROP
+};
+
 typedef int (*ffs_entity_callback)(enum ffs_entity_type entity,
 				   u8 *valuep,
 				   struct usb_descriptor_header *desc,
 				   void *priv);
 
+typedef int (*ffs_os_desc_callback)(enum ffs_os_desc_type entity,
+				    struct usb_os_desc_header *h, void *data,
+				    unsigned len, void *priv);
+
 static int __must_check ffs_do_single_desc(char *data, unsigned len,
 					   ffs_entity_callback entity,
 					   void *priv)
@@ -1856,11 +1865,191 @@ static int __ffs_data_do_entity(enum ffs_entity_type type,
 	return 0;
 }
 
+static int __ffs_do_os_desc_header(enum ffs_os_desc_type *next_type,
+				   struct usb_os_desc_header *desc)
+{
+	u16 bcd_version = le16_to_cpu(desc->bcdVersion);
+	u16 w_index = le16_to_cpu(desc->wIndex);
+
+	if (bcd_version != 1) {
+		pr_vdebug("unsupported os descriptors version: %d",
+			  bcd_version);
+		return -EINVAL;
+	}
+	switch (w_index) {
+	case 0x4:
+		*next_type = FFS_OS_DESC_EXT_COMPAT;
+		break;
+	case 0x5:
+		*next_type = FFS_OS_DESC_EXT_PROP;
+		break;
+	default:
+		pr_vdebug("unsupported os descriptor type: %d", w_index);
+		return -EINVAL;
+	}
+
+	return sizeof(*desc);
+}
+
+/*
+ * Process all extended compatibility/extended property descriptors
+ * of a feature descriptor
+ */
+static int __must_check ffs_do_single_os_desc(char *data, unsigned len,
+					      enum ffs_os_desc_type type,
+					      u16 feature_count,
+					      ffs_os_desc_callback entity,
+					      void *priv,
+					      struct usb_os_desc_header *h)
+{
+	int ret;
+	const unsigned _len = len;
+
+	ENTER();
+
+	/* loop over all ext compat/ext prop descriptors */
+	while (feature_count--) {
+		ret = entity(type, h, data, len, priv);
+		if (unlikely(ret < 0)) {
+			pr_debug("bad OS descriptor, type: %d\n", type);
+			return ret;
+		}
+		data += ret;
+		len -= ret;
+	}
+	return _len - len;
+}
+
+/* Process a number of complete Feature Descriptors (Ext Compat or Ext Prop) */
+static int __must_check ffs_do_os_descs(unsigned count,
+					char *data, unsigned len,
+					ffs_os_desc_callback entity, void *priv)
+{
+	const unsigned _len = len;
+	unsigned long num = 0;
+
+	ENTER();
+
+	for (num = 0; num < count; ++num) {
+		int ret;
+		enum ffs_os_desc_type type;
+		u16 feature_count;
+		struct usb_os_desc_header *desc = (void *)data;
+
+		if (len < sizeof(*desc))
+			return -EINVAL;
+
+		/*
+		 * Record "descriptor" entity.
+		 * Process dwLength, bcdVersion, wIndex, get b/wCount.
+		 * Move the data pointer to the beginning of extended
+		 * compatibilities proper or extended properties proper
+		 * portions of the data
+		 */
+		if (le32_to_cpu(desc->dwLength) > len)
+			return -EINVAL;
+
+		ret = __ffs_do_os_desc_header(&type, desc);
+		if (unlikely(ret < 0)) {
+			pr_debug("entity OS_DESCRIPTOR(%02lx); ret = %d\n",
+				 num, ret);
+			return ret;
+		}
+		/*
+		 * 16-bit hex "?? 00" Little Endian looks like 8-bit hex "??"
+		 */
+		feature_count = le16_to_cpu(desc->wCount);
+		if (type == FFS_OS_DESC_EXT_COMPAT &&
+		    (feature_count > 255 || desc->Reserved))
+				return -EINVAL;
+		len -= ret;
+		data += ret;
+
+		/*
+		 * Process all function/property descriptors
+		 * of this Feature Descriptor
+		 */
+		ret = ffs_do_single_os_desc(data, len, type,
+					    feature_count, entity, priv, desc);
+		if (unlikely(ret < 0)) {
+			pr_debug("%s returns %d\n", __func__, ret);
+			return ret;
+		}
+
+		len -= ret;
+		data += ret;
+	}
+	return _len - len;
+}
+
+/**
+ * Validate contents of the buffer from userspace related to OS descriptors.
+ */
+static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
+				 struct usb_os_desc_header *h, void *data,
+				 unsigned len, void *priv)
+{
+	struct ffs_data *ffs = priv;
+	u8 length;
+
+	ENTER();
+
+	switch (type) {
+	case FFS_OS_DESC_EXT_COMPAT: {
+		struct usb_ext_compat_desc *d = data;
+		int i;
+
+		if (len < sizeof(*d) ||
+		    d->bFirstInterfaceNumber >= ffs->interfaces_count ||
+		    d->Reserved1)
+			return -EINVAL;
+		for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
+			if (d->Reserved2[i])
+				return -EINVAL;
+
+		length = sizeof(struct usb_ext_compat_desc);
+	}
+		break;
+	case FFS_OS_DESC_EXT_PROP: {
+		struct usb_ext_prop_desc *d = data;
+		u32 type, pdl;
+		u16 pnl;
+
+		if (len < sizeof(*d) || h->interface >= ffs->interfaces_count)
+			return -EINVAL;
+		length = le32_to_cpu(d->dwSize);
+		type = le32_to_cpu(d->dwPropertyDataType);
+		if (type < USB_EXT_PROP_UNICODE ||
+		    type > USB_EXT_PROP_UNICODE_MULTI) {
+			pr_vdebug("unsupported os descriptor property type: %d",
+				  type);
+			return -EINVAL;
+		}
+		pnl = le16_to_cpu(d->wPropertyNameLength);
+		pdl = le32_to_cpu(*(u32 *)((u8 *)data + 10 + pnl));
+		if (length != 14 + pnl + pdl) {
+			pr_vdebug("invalid os descriptor length: %d pnl:%d pdl:%d (descriptor %d)\n",
+				  length, pnl, pdl, type);
+			return -EINVAL;
+		}
+		++ffs->ms_os_descs_ext_prop_count;
+		/* property name reported to the host as "WCHAR"s */
+		ffs->ms_os_descs_ext_prop_name_len += pnl * 2;
+		ffs->ms_os_descs_ext_prop_data_len += pdl;
+	}
+		break;
+	default:
+		pr_vdebug("unknown descriptor: %d\n", type);
+		return -EINVAL;
+	}
+	return length;
+}
+
 static int __ffs_data_got_descs(struct ffs_data *ffs,
 				char *const _data, size_t len)
 {
 	char *data = _data, *raw_descs;
-	unsigned counts[3], flags;
+	unsigned os_descs_count = 0, counts[3], flags;
 	int ret = -EINVAL, i;
 
 	ENTER();
@@ -1878,7 +2067,8 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 		flags = get_unaligned_le32(data + 8);
 		if (flags & ~(FUNCTIONFS_HAS_FS_DESC |
 			      FUNCTIONFS_HAS_HS_DESC |
-			      FUNCTIONFS_HAS_SS_DESC)) {
+			      FUNCTIONFS_HAS_SS_DESC |
+			      FUNCTIONFS_HAS_MS_OS_DESC)) {
 			ret = -ENOSYS;
 			goto error;
 		}
@@ -1901,6 +2091,11 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 			len  -= 4;
 		}
 	}
+	if (flags & (1 << i)) {
+		os_descs_count = get_unaligned_le32(data);
+		data += 4;
+		len -= 4;
+	};
 
 	/* Read descriptors */
 	raw_descs = data;
@@ -1914,6 +2109,14 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 		data += ret;
 		len  -= ret;
 	}
+	if (os_descs_count) {
+		ret = ffs_do_os_descs(os_descs_count, data, len,
+				      __ffs_data_do_os_desc, ffs);
+		if (ret < 0)
+			goto error;
+		data += ret;
+		len -= ret;
+	}
 
 	if (raw_descs == data || len) {
 		ret = -EINVAL;
@@ -1926,6 +2129,7 @@ static int __ffs_data_got_descs(struct ffs_data *ffs,
 	ffs->fs_descs_count	= counts[0];
 	ffs->hs_descs_count	= counts[1];
 	ffs->ss_descs_count	= counts[2];
+	ffs->ms_os_descs_count	= os_descs_count;
 
 	return 0;
 
@@ -2267,6 +2471,85 @@ static int __ffs_func_bind_do_nums(enum ffs_entity_type type, u8 *valuep,
 	return 0;
 }
 
+static int __ffs_func_bind_do_os_desc(enum ffs_os_desc_type type,
+				      struct usb_os_desc_header *h, void *data,
+				      unsigned len, void *priv)
+{
+	struct ffs_function *func = priv;
+	u8 length = 0;
+
+	switch (type) {
+	case FFS_OS_DESC_EXT_COMPAT: {
+		struct usb_ext_compat_desc *desc = data;
+		struct usb_os_desc_table *t;
+
+		t = &func->function.os_desc_table[desc->bFirstInterfaceNumber];
+		t->if_id = func->interfaces_nums[desc->bFirstInterfaceNumber];
+		memcpy(t->os_desc->ext_compat_id, &desc->CompatibleID,
+		       ARRAY_SIZE(desc->CompatibleID) +
+		       ARRAY_SIZE(desc->SubCompatibleID));
+		length = sizeof(*desc);
+	}
+		break;
+	case FFS_OS_DESC_EXT_PROP: {
+		struct usb_ext_prop_desc *desc = data;
+		struct usb_os_desc_table *t;
+		struct usb_os_desc_ext_prop *ext_prop;
+		char *ext_prop_name;
+		char *ext_prop_data;
+
+		t = &func->function.os_desc_table[h->interface];
+		t->if_id = func->interfaces_nums[h->interface];
+
+		ext_prop = func->ffs->ms_os_descs_ext_prop_avail;
+		func->ffs->ms_os_descs_ext_prop_avail += sizeof(*ext_prop);
+
+		ext_prop->type = le32_to_cpu(desc->dwPropertyDataType);
+		ext_prop->name_len = le16_to_cpu(desc->wPropertyNameLength);
+		ext_prop->data_len = le32_to_cpu(*(u32 *)
+			usb_ext_prop_data_len_ptr(data, ext_prop->name_len));
+		length = ext_prop->name_len + ext_prop->data_len + 14;
+
+		ext_prop_name = func->ffs->ms_os_descs_ext_prop_name_avail;
+		func->ffs->ms_os_descs_ext_prop_name_avail +=
+			ext_prop->name_len;
+
+		ext_prop_data = func->ffs->ms_os_descs_ext_prop_data_avail;
+		func->ffs->ms_os_descs_ext_prop_data_avail +=
+			ext_prop->data_len;
+		memcpy(ext_prop_data,
+		       usb_ext_prop_data_ptr(data, ext_prop->name_len),
+		       ext_prop->data_len);
+		/* unicode data reported to the host as "WCHAR"s */
+		switch (ext_prop->type) {
+		case USB_EXT_PROP_UNICODE:
+		case USB_EXT_PROP_UNICODE_ENV:
+		case USB_EXT_PROP_UNICODE_LINK:
+		case USB_EXT_PROP_UNICODE_MULTI:
+			ext_prop->data_len *= 2;
+			break;
+		}
+		ext_prop->data = ext_prop_data;
+
+		memcpy(ext_prop_name, usb_ext_prop_name_ptr(data),
+		       ext_prop->name_len);
+		/* property name reported to the host as "WCHAR"s */
+		ext_prop->name_len *= 2;
+		ext_prop->name = ext_prop_name;
+
+		t->os_desc->ext_prop_len +=
+			ext_prop->name_len + ext_prop->data_len + 14;
+		++t->os_desc->ext_prop_count;
+		list_add_tail(&ext_prop->entry, &t->os_desc->ext_prop);
+	}
+		break;
+	default:
+		pr_vdebug("unknown descriptor: %d\n", type);
+	}
+
+	return length;
+}
+
 static inline struct f_fs_opts *ffs_do_functionfs_bind(struct usb_function *f,
 						struct usb_configuration *c)
 {
@@ -2328,7 +2611,7 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	const int super = gadget_is_superspeed(func->gadget) &&
 		func->ffs->ss_descs_count;
 
-	int fs_len, hs_len, ret;
+	int fs_len, hs_len, ss_len, ret, i;
 
 	/* Make it a single chunk, less management later on */
 	vla_group(d);
@@ -2340,6 +2623,18 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	vla_item_with_sz(d, struct usb_descriptor_header *, ss_descs,
 		super ? ffs->ss_descs_count + 1 : 0);
 	vla_item_with_sz(d, short, inums, ffs->interfaces_count);
+	vla_item_with_sz(d, struct usb_os_desc_table, os_desc_table,
+			 c->cdev->use_os_string ? ffs->interfaces_count : 0);
+	vla_item_with_sz(d, char[16], ext_compat,
+			 c->cdev->use_os_string ? ffs->interfaces_count : 0);
+	vla_item_with_sz(d, struct usb_os_desc, os_desc,
+			 c->cdev->use_os_string ? ffs->interfaces_count : 0);
+	vla_item_with_sz(d, struct usb_os_desc_ext_prop, ext_prop,
+			 ffs->ms_os_descs_ext_prop_count);
+	vla_item_with_sz(d, char, ext_prop_name,
+			 ffs->ms_os_descs_ext_prop_name_len);
+	vla_item_with_sz(d, char, ext_prop_data,
+			 ffs->ms_os_descs_ext_prop_data_len);
 	vla_item_with_sz(d, char, raw_descs, ffs->raw_descs_length);
 	char *vlabuf;
 
@@ -2350,12 +2645,16 @@ static int _ffs_func_bind(struct usb_configuration *c,
 		return -ENOTSUPP;
 
 	/* Allocate a single chunk, less management later on */
-	vlabuf = kmalloc(vla_group_size(d), GFP_KERNEL);
+	vlabuf = kzalloc(vla_group_size(d), GFP_KERNEL);
 	if (unlikely(!vlabuf))
 		return -ENOMEM;
 
-	/* Zero */
-	memset(vla_ptr(vlabuf, d, eps), 0, d_eps__sz);
+	ffs->ms_os_descs_ext_prop_avail = vla_ptr(vlabuf, d, ext_prop);
+	ffs->ms_os_descs_ext_prop_name_avail =
+		vla_ptr(vlabuf, d, ext_prop_name);
+	ffs->ms_os_descs_ext_prop_data_avail =
+		vla_ptr(vlabuf, d, ext_prop_data);
+
 	/* Copy descriptors  */
 	memcpy(vla_ptr(vlabuf, d, raw_descs), ffs->raw_descs,
 	       ffs->raw_descs_length);
@@ -2409,12 +2708,16 @@ static int _ffs_func_bind(struct usb_configuration *c,
 
 	if (likely(super)) {
 		func->function.ss_descriptors = vla_ptr(vlabuf, d, ss_descs);
-		ret = ffs_do_descs(ffs->ss_descs_count,
+		ss_len = ffs_do_descs(ffs->ss_descs_count,
 				vla_ptr(vlabuf, d, raw_descs) + fs_len + hs_len,
 				d_raw_descs__sz - fs_len - hs_len,
 				__ffs_func_bind_do_descs, func);
-		if (unlikely(ret < 0))
+		if (unlikely(ss_len < 0)) {
+			ret = ss_len;
 			goto error;
+		}
+	} else {
+		ss_len = 0;
 	}
 
 	/*
@@ -2430,6 +2733,28 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	if (unlikely(ret < 0))
 		goto error;
 
+	func->function.os_desc_table = vla_ptr(vlabuf, d, os_desc_table);
+	if (c->cdev->use_os_string)
+		for (i = 0; i < ffs->interfaces_count; ++i) {
+			struct usb_os_desc *desc;
+
+			desc = func->function.os_desc_table[i].os_desc =
+				vla_ptr(vlabuf, d, os_desc) +
+				i * sizeof(struct usb_os_desc);
+			desc->ext_compat_id =
+				vla_ptr(vlabuf, d, ext_compat) + i * 16;
+			INIT_LIST_HEAD(&desc->ext_prop);
+		}
+	ret = ffs_do_os_descs(ffs->ms_os_descs_count,
+			      vla_ptr(vlabuf, d, raw_descs) +
+			      fs_len + hs_len + ss_len,
+			      d_raw_descs__sz - fs_len - hs_len - ss_len,
+			      __ffs_func_bind_do_os_desc, func);
+	if (unlikely(ret < 0))
+		goto error;
+	func->function.os_desc_n =
+		c->cdev->use_os_string ? ffs->interfaces_count : 0;
+
 	/* And we're done */
 	ffs_event_add(ffs, FUNCTIONFS_BIND);
 	return 0;
diff --git a/drivers/usb/gadget/u_fs.h b/drivers/usb/gadget/u_fs.h
index bf0ba375d459..63d6e71569c1 100644
--- a/drivers/usb/gadget/u_fs.h
+++ b/drivers/usb/gadget/u_fs.h
@@ -216,6 +216,13 @@ struct ffs_data {
 	unsigned			fs_descs_count;
 	unsigned			hs_descs_count;
 	unsigned			ss_descs_count;
+	unsigned			ms_os_descs_count;
+	unsigned			ms_os_descs_ext_prop_count;
+	unsigned			ms_os_descs_ext_prop_name_len;
+	unsigned			ms_os_descs_ext_prop_data_len;
+	void				*ms_os_descs_ext_prop_avail;
+	void				*ms_os_descs_ext_prop_name_avail;
+	void				*ms_os_descs_ext_prop_data_avail;
 
 	unsigned short			strings_count;
 	unsigned short			interfaces_count;
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 2a4b4a72a4f9..b66fae77c08c 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -18,10 +18,9 @@ enum functionfs_flags {
 	FUNCTIONFS_HAS_FS_DESC = 1,
 	FUNCTIONFS_HAS_HS_DESC = 2,
 	FUNCTIONFS_HAS_SS_DESC = 4,
+	FUNCTIONFS_HAS_MS_OS_DESC = 8,
 };
 
-#ifndef __KERNEL__
-
 /* Descriptor of an non-audio endpoint */
 struct usb_endpoint_descriptor_no_audio {
 	__u8  bLength;
@@ -33,6 +32,36 @@ struct usb_endpoint_descriptor_no_audio {
 	__u8  bInterval;
 } __attribute__((packed));
 
+/* MS OS Descriptor header */
+struct usb_os_desc_header {
+	__u8	interface;
+	__le32	dwLength;
+	__le16	bcdVersion;
+	__le16	wIndex;
+	union {
+		struct {
+			__u8	bCount;
+			__u8	Reserved;
+		};
+		__le16	wCount;
+	};
+} __attribute__((packed));
+
+struct usb_ext_compat_desc {
+	__u8	bFirstInterfaceNumber;
+	__u8	Reserved1;
+	__u8	CompatibleID[8];
+	__u8	SubCompatibleID[8];
+	__u8	Reserved2[6];
+};
+
+struct usb_ext_prop_desc {
+	__le32	dwSize;
+	__le32	dwPropertyDataType;
+	__le16	wPropertyNameLength;
+} __attribute__((packed));
+
+#ifndef __KERNEL__
 
 /*
  * Descriptors format:
@@ -45,9 +74,11 @@ struct usb_endpoint_descriptor_no_audio {
  * |     | fs_count  | LE32         | number of full-speed descriptors     |
  * |     | hs_count  | LE32         | number of high-speed descriptors     |
  * |     | ss_count  | LE32         | number of super-speed descriptors    |
+ * |     | os_count  | LE32         | number of MS OS descriptors          |
  * |     | fs_descrs | Descriptor[] | list of full-speed descriptors       |
  * |     | hs_descrs | Descriptor[] | list of high-speed descriptors       |
  * |     | ss_descrs | Descriptor[] | list of super-speed descriptors      |
+ * |     | os_descrs | OSDesc[]     | list of MS OS descriptors            |
  *
  * Depending on which flags are set, various fields may be missing in the
  * structure.  Any flags that are not recognised cause the whole block to be
@@ -74,6 +105,52 @@ struct usb_endpoint_descriptor_no_audio {
  * |   0 | bLength         | U8   | length of the descriptor |
  * |   1 | bDescriptorType | U8   | descriptor type          |
  * |   2 | payload         |      | descriptor's payload     |
+ *
+ * OSDesc[] is an array of valid MS OS Feature Descriptors which have one of
+ * the following formats:
+ *
+ * | off | name            | type | description              |
+ * |-----+-----------------+------+--------------------------|
+ * |   0 | inteface        | U8   | related interface number |
+ * |   1 | dwLength        | U32  | length of the descriptor |
+ * |   5 | bcdVersion      | U16  | currently supported: 1   |
+ * |   7 | wIndex          | U16  | currently supported: 4   |
+ * |   9 | bCount          | U8   | number of ext. compat.   |
+ * |  10 | Reserved        | U8   | 0                        |
+ * |  11 | ExtCompat[]     |      | list of ext. compat. d.  |
+ *
+ * | off | name            | type | description              |
+ * |-----+-----------------+------+--------------------------|
+ * |   0 | inteface        | U8   | related interface number |
+ * |   1 | dwLength        | U32  | length of the descriptor |
+ * |   5 | bcdVersion      | U16  | currently supported: 1   |
+ * |   7 | wIndex          | U16  | currently supported: 5   |
+ * |   9 | wCount          | U16  | number of ext. compat.   |
+ * |  11 | ExtProp[]       |      | list of ext. prop. d.    |
+ *
+ * ExtCompat[] is an array of valid Extended Compatiblity descriptors
+ * which have the following format:
+ *
+ * | off | name                  | type | description                         |
+ * |-----+-----------------------+------+-------------------------------------|
+ * |   0 | bFirstInterfaceNumber | U8   | index of the interface or of the 1st|
+ * |     |                       |      | interface in an IAD group           |
+ * |   1 | Reserved              | U8   | 0                                   |
+ * |   2 | CompatibleID          | U8[8]| compatible ID string                |
+ * |  10 | SubCompatibleID       | U8[8]| subcompatible ID string             |
+ * |  18 | Reserved              | U8[6]| 0                                   |
+ *
+ * ExtProp[] is an array of valid Extended Properties descriptors
+ * which have the following format:
+ *
+ * | off | name                  | type | description                         |
+ * |-----+-----------------------+------+-------------------------------------|
+ * |   0 | dwSize                | U32  | length of the descriptor            |
+ * |   4 | dwPropertyDataType    | U32  | 1..7                                |
+ * |   8 | wPropertyNameLength   | U16  | bPropertyName length (NL)           |
+ * |  10 | bPropertyName         |U8[NL]| name of this property               |
+ * |10+NL| dwPropertyDataLength  | U32  | bPropertyData length (DL)           |
+ * |14+NL| bProperty             |U8[DL]| payload of this property            |
  */
 
 struct usb_functionfs_strings_head {
-- 
cgit v1.2.3


From bc91b0f07ada5535427373a4e2050877bcc12218 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 11 Jul 2014 21:10:18 +0200
Subject: ipv6: addrconf: implement address generation modes

This patch introduces a possibility for userspace to set various (so far
two) modes of generating addresses. This is useful for example for
NetworkManager because it can set the mode to NONE and take care of link
local addresses itself. That allow it to have the interface up,
monitoring carrier but still don't have any addresses on it.

One more use-case by Dan Williams:
<quote>
WWAN devices often have their LL address provided by the firmware of the
device, which sometimes refuses to respond to incorrect LL addresses
when doing DHCPv6 or IPv6 ND.  The kernel cannot generate the correct LL
address for two reasons:

1) WWAN pseudo-ethernet interfaces often construct a fake MAC address,
or read a meaningless MAC address from the firmware.  Thus the EUI64 and
the IPv6LL address the kernel assigns will be wrong.  The real LL
address is often retrieved from the firmware with AT or proprietary
commands.

2) WWAN PPP interfaces receive their LL address from IPV6CP, not from
kernel assignments.  Only after IPV6CP has completed do we know the LL
address of the PPP interface and its peer.  But the kernel has already
assigned an incorrect LL address to the interface.

So being able to suppress the kernel LL address generation and assign
the one retrieved from the firmware is less complicated and more robust.
</quote>

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h       |  1 +
 include/uapi/linux/if_link.h |  6 +++++
 net/ipv6/addrconf.c          | 56 ++++++++++++++++++++++++++++++--------------
 3 files changed, 45 insertions(+), 18 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index b4956a5fcc3f..d07b1a64b4e7 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -205,6 +205,7 @@ struct inet6_dev {
 	struct timer_list	rs_timer;
 	__u8			rs_probes;
 
+	__u8			addr_gen_mode;
 	unsigned long		tstamp; /* ipv6InterfaceTable update timestamp */
 	struct rcu_head		rcu;
 };
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b38534895db5..ff957604a721 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -204,11 +204,17 @@ enum {
 	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
 	IFLA_INET6_ICMP6STATS,	/* statistics (icmpv6)		*/
 	IFLA_INET6_TOKEN,	/* device token			*/
+	IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
 	__IFLA_INET6_MAX
 };
 
 #define IFLA_INET6_MAX	(__IFLA_INET6_MAX - 1)
 
+enum in6_addr_gen_mode {
+	IN6_ADDR_GEN_MODE_EUI64,
+	IN6_ADDR_GEN_MODE_NONE,
+};
+
 enum {
 	BRIDGE_MODE_UNSPEC,
 	BRIDGE_MODE_HAIRPIN,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 358edd2272ac..4c03c2843094 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2730,9 +2730,25 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
 	}
 }
 
+static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
+{
+	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
+		struct in6_addr addr;
+
+		ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
+		/* addrconf_add_linklocal also adds a prefix_route and we
+		 * only need to care about prefix routes if ipv6_generate_eui64
+		 * couldn't generate one.
+		 */
+		if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
+			addrconf_add_linklocal(idev, &addr);
+		else if (prefix_route)
+			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+	}
+}
+
 static void addrconf_dev_config(struct net_device *dev)
 {
-	struct in6_addr addr;
 	struct inet6_dev *idev;
 
 	ASSERT_RTNL();
@@ -2753,11 +2769,7 @@ static void addrconf_dev_config(struct net_device *dev)
 	if (IS_ERR(idev))
 		return;
 
-	memset(&addr, 0, sizeof(struct in6_addr));
-	addr.s6_addr32[0] = htonl(0xFE800000);
-
-	if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
-		addrconf_add_linklocal(idev, &addr);
+	addrconf_addr_gen(idev, false);
 }
 
 #if IS_ENABLED(CONFIG_IPV6_SIT)
@@ -2779,11 +2791,7 @@ static void addrconf_sit_config(struct net_device *dev)
 	}
 
 	if (dev->priv_flags & IFF_ISATAP) {
-		struct in6_addr addr;
-
-		ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
-		if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
-			addrconf_add_linklocal(idev, &addr);
+		addrconf_addr_gen(idev, false);
 		return;
 	}
 
@@ -2798,7 +2806,6 @@ static void addrconf_sit_config(struct net_device *dev)
 static void addrconf_gre_config(struct net_device *dev)
 {
 	struct inet6_dev *idev;
-	struct in6_addr addr;
 
 	ASSERT_RTNL();
 
@@ -2807,11 +2814,7 @@ static void addrconf_gre_config(struct net_device *dev)
 		return;
 	}
 
-	ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
-	if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
-		addrconf_add_linklocal(idev, &addr);
-	else
-		addrconf_prefix_route(&addr, 64, dev, 0, 0);
+	addrconf_addr_gen(idev, true);
 }
 #endif
 
@@ -4423,6 +4426,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
 	nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
 	if (nla == NULL)
 		goto nla_put_failure;
+
+	if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode))
+		goto nla_put_failure;
+
 	read_lock_bh(&idev->lock);
 	memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
 	read_unlock_bh(&idev->lock);
@@ -4527,8 +4534,21 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
 	if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
 		BUG();
 
-	if (tb[IFLA_INET6_TOKEN])
+	if (tb[IFLA_INET6_TOKEN]) {
 		err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_INET6_ADDR_GEN_MODE]) {
+		u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
+
+		if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
+		    mode != IN6_ADDR_GEN_MODE_NONE)
+			return -EINVAL;
+		idev->addr_gen_mode = mode;
+		err = 0;
+	}
 
 	return err;
 }
-- 
cgit v1.2.3


From f736d9985e69e72d9a2ebfd131a72ee8f870c6f3 Mon Sep 17 00:00:00 2001
From: Nikita Edward Baruzdin <nebaruzdin@gmail.com>
Date: Fri, 11 Jul 2014 16:13:21 +0400
Subject: can: netlink: Remove space before tab

Fixes the corresponing checkpatch.pl warning.

Signed-off-by: Nikita Edward Baruzdin <nebaruzdin@gmail.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 813d11f54977..3bbf5c7e1500 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -92,7 +92,7 @@ struct can_ctrlmode {
 };
 
 #define CAN_CTRLMODE_LOOPBACK		0x01	/* Loopback mode */
-#define CAN_CTRLMODE_LISTENONLY		0x02 	/* Listen-only mode */
+#define CAN_CTRLMODE_LISTENONLY		0x02	/* Listen-only mode */
 #define CAN_CTRLMODE_3_SAMPLES		0x04	/* Triple sampling mode */
 #define CAN_CTRLMODE_ONE_SHOT		0x08	/* One-Shot mode */
 #define CAN_CTRLMODE_BERR_REPORTING	0x10	/* Bus-error reporting */
-- 
cgit v1.2.3


From 4b9e1bab12c9b6de965268c2fbe6ebbb35dddd89 Mon Sep 17 00:00:00 2001
From: Nikita Edward Baruzdin <nebaruzdin@gmail.com>
Date: Fri, 11 Jul 2014 16:13:22 +0400
Subject: can: netlink: Add CAN_CTRLMODE_PRESUME_ACK flag

Most CAN controllers have a support for ignoring ACK absence. Some of
them refer to this feature as a self test mode (e. g. SJA1000) and some
include it as a part of a loopback mode (e. g. MCP2510).

Setting the introduced flag via netlink should make CAN controller
perform a successful transmission, even if there is no acknowledgement
(dominant ACK bit) received.

Signed-off-by: Nikita Edward Baruzdin <nebaruzdin@gmail.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/netlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 3bbf5c7e1500..3e4323a3918d 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -97,6 +97,7 @@ struct can_ctrlmode {
 #define CAN_CTRLMODE_ONE_SHOT		0x08	/* One-Shot mode */
 #define CAN_CTRLMODE_BERR_REPORTING	0x10	/* Bus-error reporting */
 #define CAN_CTRLMODE_FD			0x20	/* CAN FD mode */
+#define CAN_CTRLMODE_PRESUME_ACK	0x40	/* Ignore missing CAN ACKs */
 
 /*
  * CAN device statistics
-- 
cgit v1.2.3


From 685343fc3ba61a1f6eef361b786601123db16c28 Mon Sep 17 00:00:00 2001
From: Tom Gundersen <teg@jklm.no>
Date: Mon, 14 Jul 2014 16:37:22 +0200
Subject: net: add name_assign_type netdev attribute

Based on a patch by David Herrmann.

The name_assign_type attribute gives hints where the interface name of a
given net-device comes from. These values are currently defined:
  NET_NAME_ENUM:
    The ifname is provided by the kernel with an enumerated
    suffix, typically based on order of discovery. Names may
    be reused and unpredictable.
  NET_NAME_PREDICTABLE:
    The ifname has been assigned by the kernel in a predictable way
    that is guaranteed to avoid reuse and always be the same for a
    given device. Examples include statically created devices like
    the loopback device and names deduced from hardware properties
    (including being given explicitly by the firmware). Names
    depending on the order of discovery, or in any other way on the
    existence of other devices, must not be marked as PREDICTABLE.
  NET_NAME_USER:
    The ifname was provided by user-space during net-device setup.
  NET_NAME_RENAMED:
    The net-device has been renamed from userspace. Once this type is set,
    it cannot change again.
  NET_NAME_UNKNOWN:
    This is an internal placeholder to indicate that we yet haven't yet
    categorized the name. It will not be exposed to userspace, rather
    -EINVAL is returned.

The aim of these patches is to improve user-space renaming of interfaces. As
a general rule, userspace must rename interfaces to guarantee that names stay
the same every time a given piece of hardware appears (at boot, or when
attaching it). However, there are several situations where userspace should
not perform the renaming, and that depends on both the policy of the local
admin, but crucially also on the nature of the current interface name.

If an interface was created in repsonse to a userspace request, and userspace
already provided a name, we most probably want to leave that name alone. The
main instance of this is wifi-P2P devices created over nl80211, which currently
have a long-standing bug where they are getting renamed by udev. We label such
names NET_NAME_USER.

If an interface, unbeknown to us, has already been renamed from userspace, we
most probably want to leave also that alone. This will typically happen when
third-party plugins (for instance to udev, but the interface is generic so could
be from anywhere) renames the interface without informing udev about it. A
typical situation is when you switch root from an installer or an initrd to the
real system and the new instance of udev does not know what happened before
the switch. These types of problems have caused repeated issues in the past. To
solve this, once an interface has been renamed, its name is labelled
NET_NAME_RENAMED.

In many cases, the kernel is actually able to name interfaces in such a
way that there is no need for userspace to rename them. This is the case when
the enumeration order of devices, or in fact any other (non-parent) device on
the system, can not influence the name of the interface. Examples include
statically created devices, or any naming schemes based on hardware properties
of the interface. In this case the admin may prefer to use the kernel-provided
names, and to make that possible we label such names NET_NAME_PREDICTABLE.
We want the kernel to have tho possibilty of performing predictable interface
naming itself (and exposing to userspace that it has), as the information
necessary for a proper naming scheme for a certain class of devices may not
be exposed to userspace.

The case where renaming is almost certainly desired, is when the kernel has
given the interface a name using global device enumeration based on order of
discovery (ethX, wlanY, etc). These naming schemes are labelled NET_NAME_ENUM.

Lastly, a fallback is left as NET_NAME_UNKNOWN, to indicate that a driver has
not yet been ported. This is mostly useful as a transitionary measure, allowing
us to label the various naming schemes bit by bit.

v8: minor documentation fixes
v9: move comment to the right commit

Signed-off-by: Tom Gundersen <teg@jklm.no>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Reviewed-by: Kay Sievers <kay@vrfy.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net | 11 +++++++++++
 include/linux/netdevice.h                 |  2 ++
 include/uapi/linux/netdevice.h            |  6 ++++++
 net/core/net-sysfs.c                      | 20 ++++++++++++++++++++
 4 files changed, 39 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 416c5d59f52e..d322b0581194 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -1,3 +1,14 @@
+What:		/sys/class/net/<iface>/name_assign_type
+Date:		July 2014
+KernelVersion:	3.17
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the name assignment type. Possible values are:
+		1: enumerated by the kernel, possibly in an unpredictable way
+		2: predictably named by the kernel
+		3: named by userspace
+		4: renamed
+
 What:		/sys/class/net/<iface>/addr_assign_type
 Date:		July 2010
 KernelVersion:	3.2
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a320db96180..9be34732142f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1381,6 +1381,8 @@ struct net_device {
 	struct kset		*queues_kset;
 #endif
 
+	unsigned char		name_assign_type;
+
 	bool			uc_promisc;
 	unsigned int		promiscuity;
 	unsigned int		allmulti;
diff --git a/include/uapi/linux/netdevice.h b/include/uapi/linux/netdevice.h
index fdfbd1c17065..55818543342d 100644
--- a/include/uapi/linux/netdevice.h
+++ b/include/uapi/linux/netdevice.h
@@ -37,6 +37,12 @@
 #define INIT_NETDEV_GROUP	0
 
 
+/* interface name assignment types (sysfs name_assign_type attribute) */
+#define NET_NAME_UNKNOWN	0	/* unknown origin (not exposed to userspace) */
+#define NET_NAME_ENUM		1	/* enumerated by kernel */
+#define NET_NAME_PREDICTABLE	2	/* predictably named by the kernel */
+#define NET_NAME_USER		3	/* provided by user-space */
+#define NET_NAME_RENAMED	4	/* renamed by user-space */
 
 /* Media selection options. */
 enum {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1cac29ebb05b..7752f2ad49a5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -112,6 +112,25 @@ NETDEVICE_SHOW_RO(ifindex, fmt_dec);
 NETDEVICE_SHOW_RO(type, fmt_dec);
 NETDEVICE_SHOW_RO(link_mode, fmt_dec);
 
+static ssize_t format_name_assign_type(const struct net_device *net, char *buf)
+{
+	return sprintf(buf, fmt_dec, net->name_assign_type);
+}
+
+static ssize_t name_assign_type_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct net_device *net = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	if (net->name_assign_type != NET_NAME_UNKNOWN)
+		ret = netdev_show(dev, attr, buf, format_name_assign_type);
+
+	return ret;
+}
+static DEVICE_ATTR_RO(name_assign_type);
+
 /* use same locking rules as GIFHWADDR ioctl's */
 static ssize_t address_show(struct device *dev, struct device_attribute *attr,
 			    char *buf)
@@ -387,6 +406,7 @@ static struct attribute *net_class_attrs[] = {
 	&dev_attr_dev_port.attr,
 	&dev_attr_iflink.attr,
 	&dev_attr_ifindex.attr,
+	&dev_attr_name_assign_type.attr,
 	&dev_attr_addr_assign_type.attr,
 	&dev_attr_addr_len.attr,
 	&dev_attr_link_mode.attr,
-- 
cgit v1.2.3


From 63b949382c5f263746b1c177f6ff84de2201ae9d Mon Sep 17 00:00:00 2001
From: Geir Ola Vaagland <geirola@gmail.com>
Date: Sat, 12 Jul 2014 20:30:36 +0200
Subject: net: sctp: implement rfc6458, 5.3.4. SCTP_SNDINFO cmsg support

This patch implements section 5.3.4. of RFC6458, that is, support
for 'SCTP Send Information Structure' (SCTP_SNDINFO) which can be
placed into ancillary data cmsghdr structure for sendmsg() calls.

The sctp_sndinfo structure is defined as per RFC as below ...

  struct sctp_sndinfo {
    uint16_t snd_sid;
    uint16_t snd_flags;
    uint32_t snd_ppid;
    uint32_t snd_context;
    sctp_assoc_t snd_assoc_id;
  };

... and supplied under cmsg_level IPPROTO_SCTP, cmsg_type
SCTP_SNDINFO, while cmsg_data[] contains struct sctp_sndinfo.
An sctp_sndinfo item always corresponds to the data in msg_iov.

Joint work with Daniel Borkmann.

Signed-off-by: Geir Ola Vaagland <geirola@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  3 +-
 include/uapi/linux/sctp.h  | 22 +++++++++++--
 net/sctp/socket.c          | 77 ++++++++++++++++++++++++++++++++++------------
 3 files changed, 79 insertions(+), 23 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f38588bf3462..7af9a0f5d8ce 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1919,7 +1919,8 @@ struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc);
 /* A convenience structure to parse out SCTP specific CMSGs. */
 typedef struct sctp_cmsgs {
 	struct sctp_initmsg *init;
-	struct sctp_sndrcvinfo *info;
+	struct sctp_sndrcvinfo *srinfo;
+	struct sctp_sndinfo *sinfo;
 } sctp_cmsgs_t;
 
 /* Structure for tracking memory objects */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 266022a2be4a..a387761f7e02 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -154,6 +154,22 @@ struct sctp_sndrcvinfo {
 	sctp_assoc_t sinfo_assoc_id;
 };
 
+/* 5.3.4 SCTP Send Information Structure (SCTP_SNDINFO)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_SNDINFO   struct sctp_sndinfo
+ */
+struct sctp_sndinfo {
+	__u16 snd_sid;
+	__u16 snd_flags;
+	__u32 snd_ppid;
+	__u32 snd_context;
+	sctp_assoc_t snd_assoc_id;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -177,10 +193,12 @@ typedef union {
 
 /* These are cmsg_types.  */
 typedef enum sctp_cmsg_type {
-	SCTP_INIT,              /* 5.2.1 SCTP Initiation Structure */
+	SCTP_INIT,		/* 5.2.1 SCTP Initiation Structure */
 #define SCTP_INIT	SCTP_INIT
-	SCTP_SNDRCV,            /* 5.2.2 SCTP Header Information Structure */
+	SCTP_SNDRCV,		/* 5.2.2 SCTP Header Information Structure */
 #define SCTP_SNDRCV	SCTP_SNDRCV
+	SCTP_SNDINFO,		/* 5.3.4 SCTP Send Information Structure */
+#define SCTP_SNDINFO	SCTP_SNDINFO
 } sctp_cmsg_t;
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 429899689408..d61729e99856 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1602,12 +1602,13 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct sctp_initmsg *sinit;
 	sctp_assoc_t associd = 0;
 	sctp_cmsgs_t cmsgs = { NULL };
-	int err;
 	sctp_scope_t scope;
-	long timeo;
-	__u16 sinfo_flags = 0;
+	bool fill_sinfo_ttl = false;
 	struct sctp_datamsg *datamsg;
 	int msg_flags = msg->msg_flags;
+	__u16 sinfo_flags = 0;
+	long timeo;
+	int err;
 
 	err = 0;
 	sp = sctp_sk(sk);
@@ -1648,10 +1649,21 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 		msg_name = msg->msg_name;
 	}
 
-	sinfo = cmsgs.info;
 	sinit = cmsgs.init;
+	if (cmsgs.sinfo != NULL) {
+		memset(&default_sinfo, 0, sizeof(default_sinfo));
+		default_sinfo.sinfo_stream = cmsgs.sinfo->snd_sid;
+		default_sinfo.sinfo_flags = cmsgs.sinfo->snd_flags;
+		default_sinfo.sinfo_ppid = cmsgs.sinfo->snd_ppid;
+		default_sinfo.sinfo_context = cmsgs.sinfo->snd_context;
+		default_sinfo.sinfo_assoc_id = cmsgs.sinfo->snd_assoc_id;
 
-	/* Did the user specify SNDRCVINFO?  */
+		sinfo = &default_sinfo;
+		fill_sinfo_ttl = true;
+	} else {
+		sinfo = cmsgs.srinfo;
+	}
+	/* Did the user specify SNDINFO/SNDRCVINFO? */
 	if (sinfo) {
 		sinfo_flags = sinfo->sinfo_flags;
 		associd = sinfo->sinfo_assoc_id;
@@ -1858,8 +1870,8 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	pr_debug("%s: we have a valid association\n", __func__);
 
 	if (!sinfo) {
-		/* If the user didn't specify SNDRCVINFO, make up one with
-		 * some defaults.
+		/* If the user didn't specify SNDINFO/SNDRCVINFO, make up
+		 * one with some defaults.
 		 */
 		memset(&default_sinfo, 0, sizeof(default_sinfo));
 		default_sinfo.sinfo_stream = asoc->default_stream;
@@ -1868,7 +1880,13 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 		default_sinfo.sinfo_context = asoc->default_context;
 		default_sinfo.sinfo_timetolive = asoc->default_timetolive;
 		default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
+
 		sinfo = &default_sinfo;
+	} else if (fill_sinfo_ttl) {
+		/* In case SNDINFO was specified, we still need to fill
+		 * it with a default ttl from the assoc here.
+		 */
+		sinfo->sinfo_timetolive = asoc->default_timetolive;
 	}
 
 	/* API 7.1.7, the sndbuf size per association bounds the
@@ -6390,8 +6408,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 	struct cmsghdr *cmsg;
 	struct msghdr *my_msg = (struct msghdr *)msg;
 
-	for (cmsg = CMSG_FIRSTHDR(msg);
-	     cmsg != NULL;
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
 	     cmsg = CMSG_NXTHDR(my_msg, cmsg)) {
 		if (!CMSG_OK(my_msg, cmsg))
 			return -EINVAL;
@@ -6404,7 +6421,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 		switch (cmsg->cmsg_type) {
 		case SCTP_INIT:
 			/* SCTP Socket API Extension
-			 * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
+			 * 5.3.1 SCTP Initiation Structure (SCTP_INIT)
 			 *
 			 * This cmsghdr structure provides information for
 			 * initializing new SCTP associations with sendmsg().
@@ -6416,15 +6433,15 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 			 * ------------  ------------   ----------------------
 			 * IPPROTO_SCTP  SCTP_INIT      struct sctp_initmsg
 			 */
-			if (cmsg->cmsg_len !=
-			    CMSG_LEN(sizeof(struct sctp_initmsg)))
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_initmsg)))
 				return -EINVAL;
-			cmsgs->init = (struct sctp_initmsg *)CMSG_DATA(cmsg);
+
+			cmsgs->init = CMSG_DATA(cmsg);
 			break;
 
 		case SCTP_SNDRCV:
 			/* SCTP Socket API Extension
-			 * 5.2.2 SCTP Header Information Structure(SCTP_SNDRCV)
+			 * 5.3.2 SCTP Header Information Structure(SCTP_SNDRCV)
 			 *
 			 * This cmsghdr structure specifies SCTP options for
 			 * sendmsg() and describes SCTP header information
@@ -6434,24 +6451,44 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 			 * ------------  ------------   ----------------------
 			 * IPPROTO_SCTP  SCTP_SNDRCV    struct sctp_sndrcvinfo
 			 */
-			if (cmsg->cmsg_len !=
-			    CMSG_LEN(sizeof(struct sctp_sndrcvinfo)))
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndrcvinfo)))
 				return -EINVAL;
 
-			cmsgs->info =
-				(struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
+			cmsgs->srinfo = CMSG_DATA(cmsg);
 
-			/* Minimally, validate the sinfo_flags. */
-			if (cmsgs->info->sinfo_flags &
+			if (cmsgs->srinfo->sinfo_flags &
 			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
 			      SCTP_ABORT | SCTP_EOF))
 				return -EINVAL;
 			break;
 
+		case SCTP_SNDINFO:
+			/* SCTP Socket API Extension
+			 * 5.3.4 SCTP Send Information Structure (SCTP_SNDINFO)
+			 *
+			 * This cmsghdr structure specifies SCTP options for
+			 * sendmsg(). This structure and SCTP_RCVINFO replaces
+			 * SCTP_SNDRCV which has been deprecated.
+			 *
+			 * cmsg_level    cmsg_type      cmsg_data[]
+			 * ------------  ------------   ---------------------
+			 * IPPROTO_SCTP  SCTP_SNDINFO    struct sctp_sndinfo
+			 */
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct sctp_sndinfo)))
+				return -EINVAL;
+
+			cmsgs->sinfo = CMSG_DATA(cmsg);
+
+			if (cmsgs->sinfo->snd_flags &
+			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+			      SCTP_ABORT | SCTP_EOF))
+				return -EINVAL;
+			break;
 		default:
 			return -EINVAL;
 		}
 	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 0d3a421d284812d07970b4ccee74d4fa38737e4d Mon Sep 17 00:00:00 2001
From: Geir Ola Vaagland <geirola@gmail.com>
Date: Sat, 12 Jul 2014 20:30:37 +0200
Subject: net: sctp: implement rfc6458, 5.3.5. SCTP_RCVINFO cmsg support

This patch implements section 5.3.5. of RFC6458, that is, support
for 'SCTP Receive Information Structure' (SCTP_RCVINFO) which is
placed into ancillary data cmsghdr structure for each recvmsg()
call.

This option can be enabled/disabled via setsockopt(2) on SOL_SCTP
level by setting an int value with 1/0 for SCTP_RECVRCVINFO in user
space applications as per RFC6458, section 8.1.29.

The sctp_rcvinfo structure is defined as per RFC as below ...

  struct sctp_rcvinfo {
    uint16_t rcv_sid;
    uint16_t rcv_ssn;
    uint16_t rcv_flags;
    <-- 2 bytes hole  -->
    uint32_t rcv_ppid;
    uint32_t rcv_tsn;
    uint32_t rcv_cumtsn;
    uint32_t rcv_context;
    sctp_assoc_t rcv_assoc_id;
  };

... and provided under cmsg_level IPPROTO_SCTP, cmsg_type
SCTP_RCVINFO, while cmsg_data[] contains struct sctp_rcvinfo.
An sctp_rcvinfo item always corresponds to the data in msg_iov.

Joint work with Daniel Borkmann.

Signed-off-by: Geir Ola Vaagland <geirola@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h  |  3 +++
 include/net/sctp/ulpevent.h |  5 ++++-
 include/uapi/linux/sctp.h   | 32 ++++++++++++++++++++++-------
 net/sctp/socket.c           | 49 ++++++++++++++++++++++++++++++++++++++++++++-
 net/sctp/ulpevent.c         | 25 +++++++++++++++++++++++
 5 files changed, 105 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 7af9a0f5d8ce..11d5df015370 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -207,7 +207,9 @@ struct sctp_sock {
 	struct sctp_paddrparams paddrparam;
 	struct sctp_event_subscribe subscribe;
 	struct sctp_assocparams assocparams;
+
 	int user_frag;
+
 	__u32 autoclose;
 	__u8 nodelay;
 	__u8 disable_fragments;
@@ -215,6 +217,7 @@ struct sctp_sock {
 	__u8 frag_interleave;
 	__u32 adaptation_ind;
 	__u32 pd_point;
+	__u8 recvrcvinfo;
 
 	atomic_t pd_mode;
 	/* Receive to here while partial delivery is in effect. */
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index daacb32b55b5..e8095f973e94 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -129,7 +129,10 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event(
 	const struct sctp_association *asoc, gfp_t gfp);
 
 void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
-	struct msghdr *);
+				   struct msghdr *);
+void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
+				struct msghdr *);
+
 __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event);
 
 /* Is this event type enabled? */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index a387761f7e02..29b81bbfc53d 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -95,6 +95,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_GET_ASSOC_ID_LIST	29	/* Read only */
 #define SCTP_AUTO_ASCONF       30
 #define SCTP_PEER_ADDR_THLDS	31
+#define SCTP_RECVRCVINFO	32
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
@@ -110,8 +111,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
 #define SCTP_GET_ASSOC_STATS	112	/* Read only */
 
-/*
- * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
+/* 5.3.1 SCTP Initiation Structure (SCTP_INIT)
  *
  *   This cmsghdr structure provides information for initializing new
  *   SCTP associations with sendmsg().  The SCTP_INITMSG socket option
@@ -121,7 +121,6 @@ typedef __s32 sctp_assoc_t;
  *   cmsg_level    cmsg_type      cmsg_data[]
  *   ------------  ------------   ----------------------
  *   IPPROTO_SCTP  SCTP_INIT      struct sctp_initmsg
- *
  */
 struct sctp_initmsg {
 	__u16 sinit_num_ostreams;
@@ -130,8 +129,7 @@ struct sctp_initmsg {
 	__u16 sinit_max_init_timeo;
 };
 
-/*
- * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
+/* 5.3.2 SCTP Header Information Structure (SCTP_SNDRCV)
  *
  *   This cmsghdr structure specifies SCTP options for sendmsg() and
  *   describes SCTP header information about a received message through
@@ -140,7 +138,6 @@ struct sctp_initmsg {
  *   cmsg_level    cmsg_type      cmsg_data[]
  *   ------------  ------------   ----------------------
  *   IPPROTO_SCTP  SCTP_SNDRCV    struct sctp_sndrcvinfo
- *
  */
 struct sctp_sndrcvinfo {
 	__u16 sinfo_stream;
@@ -170,13 +167,32 @@ struct sctp_sndinfo {
 	sctp_assoc_t snd_assoc_id;
 };
 
+/* 5.3.5 SCTP Receive Information Structure (SCTP_RCVINFO)
+ *
+ *   This cmsghdr structure describes SCTP receive information
+ *   about a received message through recvmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_RCVINFO   struct sctp_rcvinfo
+ */
+struct sctp_rcvinfo {
+	__u16 rcv_sid;
+	__u16 rcv_ssn;
+	__u16 rcv_flags;
+	__u32 rcv_ppid;
+	__u32 rcv_tsn;
+	__u32 rcv_cumtsn;
+	__u32 rcv_context;
+	sctp_assoc_t rcv_assoc_id;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
  *   This field may contain any of the following flags and is composed of
  *   a bitwise OR of these values.
  */
-
 enum sctp_sinfo_flags {
 	SCTP_UNORDERED = 1,  /* Send/receive message unordered. */
 	SCTP_ADDR_OVER = 2,  /* Override the primary destination. */
@@ -199,6 +215,8 @@ typedef enum sctp_cmsg_type {
 #define SCTP_SNDRCV	SCTP_SNDRCV
 	SCTP_SNDINFO,		/* 5.3.4 SCTP Send Information Structure */
 #define SCTP_SNDINFO	SCTP_SNDINFO
+	SCTP_RCVINFO,		/* 5.3.5 SCTP Receive Information Structure */
+#define SCTP_RCVINFO	SCTP_RCVINFO
 } sctp_cmsg_t;
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d61729e99856..9c193887c5cd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2112,9 +2112,13 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 		sp->pf->skb_msgname(skb, msg->msg_name, addr_len);
 	}
 
+	/* Check if we allow SCTP_RCVINFO. */
+	if (sp->recvrcvinfo)
+		sctp_ulpevent_read_rcvinfo(event, msg);
 	/* Check if we allow SCTP_SNDRCVINFO. */
 	if (sp->subscribe.sctp_data_io_event)
 		sctp_ulpevent_read_sndrcvinfo(event, msg);
+
 #if 0
 	/* FIXME: we should be calling IP/IPv6 layers.  */
 	if (sk->sk_protinfo.af_inet.cmsg_flags)
@@ -3541,7 +3545,6 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
 	return 0;
 }
 
-
 /*
  * SCTP_PEER_ADDR_THLDS
  *
@@ -3592,6 +3595,22 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
 	return 0;
 }
 
+static int sctp_setsockopt_recvrcvinfo(struct sock *sk,
+				       char __user *optval,
+				       unsigned int optlen)
+{
+	int val;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *) optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->recvrcvinfo = (val == 0) ? 0 : 1;
+
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3743,6 +3762,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_PEER_ADDR_THLDS:
 		retval = sctp_setsockopt_paddr_thresholds(sk, optval, optlen);
 		break;
+	case SCTP_RECVRCVINFO:
+		retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -3989,6 +4011,8 @@ static int sctp_init_sock(struct sock *sk)
 	/* Enable Nagle algorithm by default.  */
 	sp->nodelay           = 0;
 
+	sp->recvrcvinfo = 0;
+
 	/* Enable by default. */
 	sp->v4mapped          = 1;
 
@@ -5770,6 +5794,26 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len,
 	return 0;
 }
 
+static int sctp_getsockopt_recvrcvinfo(struct sock *sk,	int len,
+				       char __user *optval,
+				       int __user *optlen)
+{
+	int val = 0;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+	if (sctp_sk(sk)->recvrcvinfo)
+		val = 1;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -5913,6 +5957,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_GET_ASSOC_STATS:
 		retval = sctp_getsockopt_assoc_stats(sk, len, optval, optlen);
 		break;
+	case SCTP_RECVRCVINFO:
+		retval = sctp_getsockopt_recvrcvinfo(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index b6842fdb53d4..b31f365f18ab 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -886,6 +886,31 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
 		 sizeof(sinfo), &sinfo);
 }
 
+/* RFC6458, Section 5.3.5 SCTP Receive Information Structure
+ * (SCTP_SNDRCV)
+ */
+void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
+				struct msghdr *msghdr)
+{
+	struct sctp_rcvinfo rinfo;
+
+	if (sctp_ulpevent_is_notification(event))
+		return;
+
+	memset(&rinfo, 0, sizeof(struct sctp_rcvinfo));
+	rinfo.rcv_sid = event->stream;
+	rinfo.rcv_ssn = event->ssn;
+	rinfo.rcv_ppid = event->ppid;
+	rinfo.rcv_flags = event->flags;
+	rinfo.rcv_tsn = event->tsn;
+	rinfo.rcv_cumtsn = event->cumtsn;
+	rinfo.rcv_assoc_id = sctp_assoc2id(event->asoc);
+	rinfo.rcv_context = event->asoc->default_rcv_context;
+
+	put_cmsg(msghdr, IPPROTO_SCTP, SCTP_RCVINFO,
+		 sizeof(rinfo), &rinfo);
+}
+
 /* Do accounting for bytes received and hold a reference to the association
  * for each skb.
  */
-- 
cgit v1.2.3


From 2347c80ff127b94ddaa675e2b78ab4cef46dc905 Mon Sep 17 00:00:00 2001
From: Geir Ola Vaagland <geirola@gmail.com>
Date: Sat, 12 Jul 2014 20:30:38 +0200
Subject: net: sctp: implement rfc6458, 5.3.6. SCTP_NXTINFO cmsg support

This patch implements section 5.3.6. of RFC6458, that is, support
for 'SCTP Next Receive Information Structure' (SCTP_NXTINFO) which
is placed into ancillary data cmsghdr structure for each recvmsg()
call, if this information is already available when delivering the
current message.

This option can be enabled/disabled via setsockopt(2) on SOL_SCTP
level by setting an int value with 1/0 for SCTP_RECVNXTINFO in
user space applications as per RFC6458, section 8.1.30.

The sctp_nxtinfo structure is defined as per RFC as below ...

  struct sctp_nxtinfo {
    uint16_t nxt_sid;
    uint16_t nxt_flags;
    uint32_t nxt_ppid;
    uint32_t nxt_length;
    sctp_assoc_t nxt_assoc_id;
  };

... and provided under cmsg_level IPPROTO_SCTP, cmsg_type
SCTP_NXTINFO, while cmsg_data[] contains struct sctp_nxtinfo.

Joint work with Daniel Borkmann.

Signed-off-by: Geir Ola Vaagland <geirola@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h     |  1 +
 include/net/sctp/structs.h  |  1 +
 include/net/sctp/ulpevent.h |  9 ++------
 include/uapi/linux/sctp.h   | 47 +++++++++++++++++++++++++++++-----------
 net/sctp/socket.c           | 52 +++++++++++++++++++++++++++++++++++++++++----
 net/sctp/ulpevent.c         | 38 +++++++++++++++++++++++++++++++++
 6 files changed, 125 insertions(+), 23 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index c2035c96a2ee..90c1cccd164d 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -109,6 +109,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 		    struct sctp_association *asoc);
 extern struct percpu_counter sctp_sockets_allocated;
 int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
+struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
 
 /*
  * sctp/primitive.c
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 11d5df015370..7741d1b66967 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -218,6 +218,7 @@ struct sctp_sock {
 	__u32 adaptation_ind;
 	__u32 pd_point;
 	__u8 recvrcvinfo;
+	__u8 recvnxtinfo;
 
 	atomic_t pd_mode;
 	/* Receive to here while partial delivery is in effect. */
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index e8095f973e94..cccdcfd14973 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -132,6 +132,8 @@ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event,
 				   struct msghdr *);
 void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
 				struct msghdr *);
+void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event,
+				struct msghdr *, struct sock *sk);
 
 __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event);
 
@@ -158,10 +160,3 @@ static inline int sctp_ulpevent_is_enabled(const struct sctp_ulpevent *event,
 }
 
 #endif /* __sctp_ulpevent_h__ */
-
-
-
-
-
-
-
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 29b81bbfc53d..222f82ffeca4 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -96,6 +96,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_AUTO_ASCONF       30
 #define SCTP_PEER_ADDR_THLDS	31
 #define SCTP_RECVRCVINFO	32
+#define SCTP_RECVNXTINFO	33
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
@@ -111,6 +112,13 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
 #define SCTP_GET_ASSOC_STATS	112	/* Read only */
 
+/* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
+/* On user space Linux, these live in <bits/socket.h> as an enum.  */
+enum sctp_msg_flags {
+	MSG_NOTIFICATION = 0x8000,
+#define MSG_NOTIFICATION MSG_NOTIFICATION
+};
+
 /* 5.3.1 SCTP Initiation Structure (SCTP_INIT)
  *
  *   This cmsghdr structure provides information for initializing new
@@ -187,6 +195,25 @@ struct sctp_rcvinfo {
 	sctp_assoc_t rcv_assoc_id;
 };
 
+/* 5.3.6 SCTP Next Receive Information Structure (SCTP_NXTINFO)
+ *
+ *   This cmsghdr structure describes SCTP receive information
+ *   of the next message that will be delivered through recvmsg()
+ *   if this information is already available when delivering
+ *   the current message.
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   -------------------
+ *   IPPROTO_SCTP  SCTP_NXTINFO   struct sctp_nxtinfo
+ */
+struct sctp_nxtinfo {
+	__u16 nxt_sid;
+	__u16 nxt_flags;
+	__u32 nxt_ppid;
+	__u32 nxt_length;
+	sctp_assoc_t nxt_assoc_id;
+};
+
 /*
  *  sinfo_flags: 16 bits (unsigned integer)
  *
@@ -194,11 +221,12 @@ struct sctp_rcvinfo {
  *   a bitwise OR of these values.
  */
 enum sctp_sinfo_flags {
-	SCTP_UNORDERED = 1,  /* Send/receive message unordered. */
-	SCTP_ADDR_OVER = 2,  /* Override the primary destination. */
-	SCTP_ABORT=4,        /* Send an ABORT message to the peer. */
-	SCTP_SACK_IMMEDIATELY = 8,	/* SACK should be sent without delay */
-	SCTP_EOF=MSG_FIN,    /* Initiate graceful shutdown process. */
+	SCTP_UNORDERED		= (1 << 0), /* Send/receive message unordered. */
+	SCTP_ADDR_OVER		= (1 << 1), /* Override the primary destination. */
+	SCTP_ABORT		= (1 << 2), /* Send an ABORT message to the peer. */
+	SCTP_SACK_IMMEDIATELY	= (1 << 3), /* SACK should be sent without delay. */
+	SCTP_NOTIFICATION	= MSG_NOTIFICATION, /* Next message is not user msg but notification. */
+	SCTP_EOF		= MSG_FIN,  /* Initiate graceful shutdown process. */
 };
 
 typedef union {
@@ -217,6 +245,8 @@ typedef enum sctp_cmsg_type {
 #define SCTP_SNDINFO	SCTP_SNDINFO
 	SCTP_RCVINFO,		/* 5.3.5 SCTP Receive Information Structure */
 #define SCTP_RCVINFO	SCTP_RCVINFO
+	SCTP_NXTINFO,		/* 5.3.6 SCTP Next Receive Information Structure */
+#define SCTP_NXTINFO	SCTP_NXTINFO
 } sctp_cmsg_t;
 
 /*
@@ -844,13 +874,6 @@ struct sctp_assoc_stats {
 	__u64		sas_ictrlchunks; /* Control chunks received */
 };
 
-/* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
-/* On user space Linux, these live in <bits/socket.h> as an enum.  */
-enum sctp_msg_flags {
-	MSG_NOTIFICATION = 0x8000,
-#define MSG_NOTIFICATION MSG_NOTIFICATION
-};
-
 /*
  * 8.1 sctp_bindx()
  *
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9c193887c5cd..9bca87ee5152 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2060,8 +2060,6 @@ static int sctp_skb_pull(struct sk_buff *skb, int len)
  *  flags   - flags sent or received with the user message, see Section
  *            5 for complete description of the flags.
  */
-static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
-
 static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 			struct msghdr *msg, size_t len, int noblock,
 			int flags, int *addr_len)
@@ -2112,6 +2110,9 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 		sp->pf->skb_msgname(skb, msg->msg_name, addr_len);
 	}
 
+	/* Check if we allow SCTP_NXTINFO. */
+	if (sp->recvnxtinfo)
+		sctp_ulpevent_read_nxtinfo(event, msg, sk);
 	/* Check if we allow SCTP_RCVINFO. */
 	if (sp->recvrcvinfo)
 		sctp_ulpevent_read_rcvinfo(event, msg);
@@ -3611,6 +3612,22 @@ static int sctp_setsockopt_recvrcvinfo(struct sock *sk,
 	return 0;
 }
 
+static int sctp_setsockopt_recvnxtinfo(struct sock *sk,
+				       char __user *optval,
+				       unsigned int optlen)
+{
+	int val;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *) optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->recvnxtinfo = (val == 0) ? 0 : 1;
+
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3765,6 +3782,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_RECVRCVINFO:
 		retval = sctp_setsockopt_recvrcvinfo(sk, optval, optlen);
 		break;
+	case SCTP_RECVNXTINFO:
+		retval = sctp_setsockopt_recvnxtinfo(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -4012,6 +4032,7 @@ static int sctp_init_sock(struct sock *sk)
 	sp->nodelay           = 0;
 
 	sp->recvrcvinfo = 0;
+	sp->recvnxtinfo = 0;
 
 	/* Enable by default. */
 	sp->v4mapped          = 1;
@@ -5814,6 +5835,26 @@ static int sctp_getsockopt_recvrcvinfo(struct sock *sk,	int len,
 	return 0;
 }
 
+static int sctp_getsockopt_recvnxtinfo(struct sock *sk,	int len,
+				       char __user *optval,
+				       int __user *optlen)
+{
+	int val = 0;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+	if (sctp_sk(sk)->recvnxtinfo)
+		val = 1;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -5960,6 +6001,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_RECVRCVINFO:
 		retval = sctp_getsockopt_recvrcvinfo(sk, len, optval, optlen);
 		break;
+	case SCTP_RECVNXTINFO:
+		retval = sctp_getsockopt_recvnxtinfo(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -6602,8 +6646,8 @@ out:
  * Note: This is pretty much the same routine as in core/datagram.c
  * with a few changes to make lksctp work.
  */
-static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
-					      int noblock, int *err)
+struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
+				       int noblock, int *err)
 {
 	int error;
 	struct sk_buff *skb;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index b31f365f18ab..e049298ecfa0 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -911,6 +911,44 @@ void sctp_ulpevent_read_rcvinfo(const struct sctp_ulpevent *event,
 		 sizeof(rinfo), &rinfo);
 }
 
+/* RFC6458, Section 5.3.6. SCTP Next Receive Information Structure
+ * (SCTP_NXTINFO)
+ */
+static void __sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event,
+					 struct msghdr *msghdr,
+					 const struct sk_buff *skb)
+{
+	struct sctp_nxtinfo nxtinfo;
+
+	memset(&nxtinfo, 0, sizeof(nxtinfo));
+	nxtinfo.nxt_sid = event->stream;
+	nxtinfo.nxt_ppid = event->ppid;
+	nxtinfo.nxt_flags = event->flags;
+	if (sctp_ulpevent_is_notification(event))
+		nxtinfo.nxt_flags |= SCTP_NOTIFICATION;
+	nxtinfo.nxt_length = skb->len;
+	nxtinfo.nxt_assoc_id = sctp_assoc2id(event->asoc);
+
+	put_cmsg(msghdr, IPPROTO_SCTP, SCTP_NXTINFO,
+		 sizeof(nxtinfo), &nxtinfo);
+}
+
+void sctp_ulpevent_read_nxtinfo(const struct sctp_ulpevent *event,
+				struct msghdr *msghdr,
+				struct sock *sk)
+{
+	struct sk_buff *skb;
+	int err;
+
+	skb = sctp_skb_recv_datagram(sk, MSG_PEEK, 1, &err);
+	if (skb != NULL) {
+		__sctp_ulpevent_read_nxtinfo(sctp_skb2event(skb),
+					     msghdr, skb);
+		/* Just release refcount here. */
+		kfree_skb(skb);
+	}
+}
+
 /* Do accounting for bytes received and hold a reference to the association
  * for each skb.
  */
-- 
cgit v1.2.3


From 6b3fd5f3a2bbc8464a8e0bf134a183b8fa026439 Mon Sep 17 00:00:00 2001
From: Geir Ola Vaagland <geirola@gmail.com>
Date: Sat, 12 Jul 2014 20:30:39 +0200
Subject: net: sctp: implement rfc6458, 8.1.31. SCTP_DEFAULT_SNDINFO support

This patch implements section 8.1.31. of RFC6458, which adds support
for setting/retrieving SCTP_DEFAULT_SNDINFO:

  Applications that wish to use the sendto() system call may wish
  to specify a default set of parameters that would normally be
  supplied through the inclusion of ancillary data. This socket
  option allows such an application to set the default sctp_sndinfo
  structure. The application that wishes to use this socket option
  simply passes the sctp_sndinfo structure (defined in Section 5.3.4)
  to this call. The input parameters accepted by this call include
  snd_sid, snd_flags, snd_ppid, and snd_context. The snd_flags
  parameter is composed of a bitwise OR of SCTP_UNORDERED, SCTP_EOF,
  and SCTP_SENDALL. The snd_assoc_id field specifies the association
  to which to apply the parameters. For a one-to-many style socket,
  any of the predefined constants are also allowed in this field.
  The field is ignored for one-to-one style sockets.

Joint work with Daniel Borkmann.

Signed-off-by: Geir Ola Vaagland <geirola@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h |   1 +
 net/sctp/socket.c         | 107 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 99 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 222f82ffeca4..ce70fe6b45df 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -97,6 +97,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_PEER_ADDR_THLDS	31
 #define SCTP_RECVRCVINFO	32
 #define SCTP_RECVNXTINFO	33
+#define SCTP_DEFAULT_SNDINFO	34
 
 /* Internal Socket Options. Some of the sctp library functions are
  * implemented using these socket options.
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9bca87ee5152..d95a50c013c9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2770,19 +2770,22 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
 					      char __user *optval,
 					      unsigned int optlen)
 {
-	struct sctp_sndrcvinfo info;
-	struct sctp_association *asoc;
 	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_association *asoc;
+	struct sctp_sndrcvinfo info;
 
-	if (optlen != sizeof(struct sctp_sndrcvinfo))
+	if (optlen != sizeof(info))
 		return -EINVAL;
 	if (copy_from_user(&info, optval, optlen))
 		return -EFAULT;
+	if (info.sinfo_flags &
+	    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+	      SCTP_ABORT | SCTP_EOF))
+		return -EINVAL;
 
 	asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
 	if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
-
 	if (asoc) {
 		asoc->default_stream = info.sinfo_stream;
 		asoc->default_flags = info.sinfo_flags;
@@ -2800,6 +2803,44 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
 	return 0;
 }
 
+/* RFC6458, Section 8.1.31. Set/get Default Send Parameters
+ * (SCTP_DEFAULT_SNDINFO)
+ */
+static int sctp_setsockopt_default_sndinfo(struct sock *sk,
+					   char __user *optval,
+					   unsigned int optlen)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_association *asoc;
+	struct sctp_sndinfo info;
+
+	if (optlen != sizeof(info))
+		return -EINVAL;
+	if (copy_from_user(&info, optval, optlen))
+		return -EFAULT;
+	if (info.snd_flags &
+	    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
+	      SCTP_ABORT | SCTP_EOF))
+		return -EINVAL;
+
+	asoc = sctp_id2assoc(sk, info.snd_assoc_id);
+	if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+	if (asoc) {
+		asoc->default_stream = info.snd_sid;
+		asoc->default_flags = info.snd_flags;
+		asoc->default_ppid = info.snd_ppid;
+		asoc->default_context = info.snd_context;
+	} else {
+		sp->default_stream = info.snd_sid;
+		sp->default_flags = info.snd_flags;
+		sp->default_ppid = info.snd_ppid;
+		sp->default_context = info.snd_context;
+	}
+
+	return 0;
+}
+
 /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
  *
  * Requests that the local SCTP stack use the enclosed peer address as
@@ -3725,6 +3766,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_setsockopt_default_send_param(sk, optval,
 							    optlen);
 		break;
+	case SCTP_DEFAULT_SNDINFO:
+		retval = sctp_setsockopt_default_sndinfo(sk, optval, optlen);
+		break;
 	case SCTP_PRIMARY_ADDR:
 		retval = sctp_setsockopt_primary_addr(sk, optval, optlen);
 		break;
@@ -5027,14 +5071,14 @@ static int sctp_getsockopt_default_send_param(struct sock *sk,
 					int len, char __user *optval,
 					int __user *optlen)
 {
-	struct sctp_sndrcvinfo info;
-	struct sctp_association *asoc;
 	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_association *asoc;
+	struct sctp_sndrcvinfo info;
 
-	if (len < sizeof(struct sctp_sndrcvinfo))
+	if (len < sizeof(info))
 		return -EINVAL;
 
-	len = sizeof(struct sctp_sndrcvinfo);
+	len = sizeof(info);
 
 	if (copy_from_user(&info, optval, len))
 		return -EFAULT;
@@ -5042,7 +5086,6 @@ static int sctp_getsockopt_default_send_param(struct sock *sk,
 	asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
 	if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
-
 	if (asoc) {
 		info.sinfo_stream = asoc->default_stream;
 		info.sinfo_flags = asoc->default_flags;
@@ -5065,6 +5108,48 @@ static int sctp_getsockopt_default_send_param(struct sock *sk,
 	return 0;
 }
 
+/* RFC6458, Section 8.1.31. Set/get Default Send Parameters
+ * (SCTP_DEFAULT_SNDINFO)
+ */
+static int sctp_getsockopt_default_sndinfo(struct sock *sk, int len,
+					   char __user *optval,
+					   int __user *optlen)
+{
+	struct sctp_sock *sp = sctp_sk(sk);
+	struct sctp_association *asoc;
+	struct sctp_sndinfo info;
+
+	if (len < sizeof(info))
+		return -EINVAL;
+
+	len = sizeof(info);
+
+	if (copy_from_user(&info, optval, len))
+		return -EFAULT;
+
+	asoc = sctp_id2assoc(sk, info.snd_assoc_id);
+	if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP))
+		return -EINVAL;
+	if (asoc) {
+		info.snd_sid = asoc->default_stream;
+		info.snd_flags = asoc->default_flags;
+		info.snd_ppid = asoc->default_ppid;
+		info.snd_context = asoc->default_context;
+	} else {
+		info.snd_sid = sp->default_stream;
+		info.snd_flags = sp->default_flags;
+		info.snd_ppid = sp->default_ppid;
+		info.snd_context = sp->default_context;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &info, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
  *
  * 7.1.5 SCTP_NODELAY
@@ -5924,6 +6009,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_default_send_param(sk, len,
 							    optval, optlen);
 		break;
+	case SCTP_DEFAULT_SNDINFO:
+		retval = sctp_getsockopt_default_sndinfo(sk, len,
+							 optval, optlen);
+		break;
 	case SCTP_PRIMARY_ADDR:
 		retval = sctp_getsockopt_primary_addr(sk, len, optval, optlen);
 		break;
-- 
cgit v1.2.3


From 7e6a68210784dcea8e39fd9d4c9966f9c733ba09 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Thu, 27 Mar 2014 11:25:45 -0300
Subject: [media] videodev2.h: add initial support for compound controls

Compound controls are controls that can be used for compound and array
types. This allows for more compound data structures to be used with the
control framework.

The existing V4L2_CTRL_FLAG_NEXT_CTRL flag will only enumerate non-compound
controls, so a new V4L2_CTRL_FLAG_NEXT_COMPOUND flag is added to enumerate
compound controls. Set both flags to enumerate any control (compound or not).

Compound control types will start at V4L2_CTRL_COMPOUND_TYPES. In addition, any
control that uses the new 'ptr' field or the existing 'string' field will have
flag V4L2_CTRL_FLAG_HAS_PAYLOAD set.

While not strictly necessary, adding that flag makes life for applications
a lot simpler. If the flag is not set, then the control value is set
through the value or value64 fields of struct v4l2_ext_control, otherwise
a pointer points to the value.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 168ff507bf75..438c4a65c5b2 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1254,6 +1254,7 @@ struct v4l2_ext_control {
 		__s32 value;
 		__s64 value64;
 		char *string;
+		void *ptr;
 	};
 } __attribute__ ((packed));
 
@@ -1278,7 +1279,10 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_CTRL_CLASS    = 6,
 	V4L2_CTRL_TYPE_STRING        = 7,
 	V4L2_CTRL_TYPE_BITMASK       = 8,
-	V4L2_CTRL_TYPE_INTEGER_MENU = 9,
+	V4L2_CTRL_TYPE_INTEGER_MENU  = 9,
+
+	/* Compound types are >= 0x0100 */
+	V4L2_CTRL_COMPOUND_TYPES     = 0x0100,
 };
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
@@ -1314,9 +1318,11 @@ struct v4l2_querymenu {
 #define V4L2_CTRL_FLAG_SLIDER 		0x0020
 #define V4L2_CTRL_FLAG_WRITE_ONLY 	0x0040
 #define V4L2_CTRL_FLAG_VOLATILE		0x0080
+#define V4L2_CTRL_FLAG_HAS_PAYLOAD	0x0100
 
-/*  Query flag, to be ORed with the control ID */
+/*  Query flags, to be ORed with the control ID */
 #define V4L2_CTRL_FLAG_NEXT_CTRL	0x80000000
+#define V4L2_CTRL_FLAG_NEXT_COMPOUND	0x40000000
 
 /*  User-class control IDs defined by V4L2 */
 #define V4L2_CID_MAX_CTRLS		1024
-- 
cgit v1.2.3


From 5082c2417841e64df975789011e182ce99a9dacd Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 10 Jun 2014 04:14:50 -0300
Subject: [media] videodev2.h: add struct v4l2_query_ext_ctrl and
 VIDIOC_QUERY_EXT_CTRL

Add a new struct and ioctl to extend the amount of information you can
get for a control.

The range is now a s64 type, and array dimensions and element size can be
reported through nr_of_dims/dims/elems/elem_size.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 438c4a65c5b2..0297980fe491 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1269,6 +1269,7 @@ struct v4l2_ext_controls {
 #define V4L2_CTRL_ID_MASK      	  (0x0fffffff)
 #define V4L2_CTRL_ID2CLASS(id)    ((id) & 0x0fff0000UL)
 #define V4L2_CTRL_DRIVER_PRIV(id) (((id) & 0xffff) >= 0x1000)
+#define V4L2_CTRL_MAX_DIMS	  (4)
 
 enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_INTEGER	     = 1,
@@ -1298,6 +1299,23 @@ struct v4l2_queryctrl {
 	__u32		     reserved[2];
 };
 
+/*  Used in the VIDIOC_QUERY_EXT_CTRL ioctl for querying extended controls */
+struct v4l2_query_ext_ctrl {
+	__u32		     id;
+	__u32		     type;
+	char		     name[32];
+	__s64		     minimum;
+	__s64		     maximum;
+	__u64		     step;
+	__s64		     default_value;
+	__u32                flags;
+	__u32                elem_size;
+	__u32                elems;
+	__u32                nr_of_dims;
+	__u32                dims[V4L2_CTRL_MAX_DIMS];
+	__u32		     reserved[32];
+};
+
 /*  Used in the VIDIOC_QUERYMENU ioctl for querying menu items */
 struct v4l2_querymenu {
 	__u32		id;
@@ -2011,6 +2029,8 @@ struct v4l2_create_buffers {
    Never use these in applications! */
 #define VIDIOC_DBG_G_CHIP_INFO  _IOWR('V', 102, struct v4l2_dbg_chip_info)
 
+#define VIDIOC_QUERY_EXT_CTRL	_IOWR('V', 103, struct v4l2_query_ext_ctrl)
+
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
 
-- 
cgit v1.2.3


From dda4a4d5ea245591b788b70116fb52b0d145fb33 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 10 Jun 2014 07:30:04 -0300
Subject: [media] v4l2-ctrls/videodev2.h: add u8 and u16 types

These are needed by the upcoming patches for the motion detection
matrices.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 45 ++++++++++++++++++++++++++++++++----
 include/media/v4l2-ctrls.h           |  4 ++++
 include/uapi/linux/videodev2.h       |  4 ++++
 3 files changed, 49 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 1086ae3f9152..adf5485e56a8 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1174,6 +1174,10 @@ static bool std_equal(const struct v4l2_ctrl *ctrl, u32 idx,
 		return !strcmp(ptr1.p_char + idx, ptr2.p_char + idx);
 	case V4L2_CTRL_TYPE_INTEGER64:
 		return ptr1.p_s64[idx] == ptr2.p_s64[idx];
+	case V4L2_CTRL_TYPE_U8:
+		return ptr1.p_u8[idx] == ptr2.p_u8[idx];
+	case V4L2_CTRL_TYPE_U16:
+		return ptr1.p_u16[idx] == ptr2.p_u16[idx];
 	default:
 		if (ctrl->is_int)
 			return ptr1.p_s32[idx] == ptr2.p_s32[idx];
@@ -1201,6 +1205,12 @@ static void std_init(const struct v4l2_ctrl *ctrl, u32 idx,
 	case V4L2_CTRL_TYPE_BOOLEAN:
 		ptr.p_s32[idx] = ctrl->default_value;
 		break;
+	case V4L2_CTRL_TYPE_U8:
+		ptr.p_u8[idx] = ctrl->default_value;
+		break;
+	case V4L2_CTRL_TYPE_U16:
+		ptr.p_u16[idx] = ctrl->default_value;
+		break;
 	default:
 		idx *= ctrl->elem_size;
 		memset(ptr.p + idx, 0, ctrl->elem_size);
@@ -1242,6 +1252,12 @@ static void std_log(const struct v4l2_ctrl *ctrl)
 	case V4L2_CTRL_TYPE_STRING:
 		pr_cont("%s", ptr.p_char);
 		break;
+	case V4L2_CTRL_TYPE_U8:
+		pr_cont("%u", (unsigned)*ptr.p_u8);
+		break;
+	case V4L2_CTRL_TYPE_U16:
+		pr_cont("%u", (unsigned)*ptr.p_u16);
+		break;
 	default:
 		pr_cont("unknown type %d", ctrl->type);
 		break;
@@ -1272,6 +1288,10 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
 		return ROUND_TO_RANGE(ptr.p_s32[idx], u32, ctrl);
 	case V4L2_CTRL_TYPE_INTEGER64:
 		return ROUND_TO_RANGE(ptr.p_s64[idx], u64, ctrl);
+	case V4L2_CTRL_TYPE_U8:
+		return ROUND_TO_RANGE(ptr.p_u8[idx], u8, ctrl);
+	case V4L2_CTRL_TYPE_U16:
+		return ROUND_TO_RANGE(ptr.p_u16[idx], u16, ctrl);
 
 	case V4L2_CTRL_TYPE_BOOLEAN:
 		ptr.p_s32[idx] = !!ptr.p_s32[idx];
@@ -1502,6 +1522,8 @@ static int check_range(enum v4l2_ctrl_type type,
 		if (step != 1 || max > 1 || min < 0)
 			return -ERANGE;
 		/* fall through */
+	case V4L2_CTRL_TYPE_U8:
+	case V4L2_CTRL_TYPE_U16:
 	case V4L2_CTRL_TYPE_INTEGER:
 	case V4L2_CTRL_TYPE_INTEGER64:
 		if (step == 0 || min > max || def < min || def > max)
@@ -1803,12 +1825,25 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
 	}
 	is_array = nr_of_dims > 0;
 
-	if (type == V4L2_CTRL_TYPE_INTEGER64)
+	/* Prefill elem_size for all types handled by std_type_ops */
+	switch (type) {
+	case V4L2_CTRL_TYPE_INTEGER64:
 		elem_size = sizeof(s64);
-	else if (type == V4L2_CTRL_TYPE_STRING)
+		break;
+	case V4L2_CTRL_TYPE_STRING:
 		elem_size = max + 1;
-	else if (type < V4L2_CTRL_COMPOUND_TYPES)
-		elem_size = sizeof(s32);
+		break;
+	case V4L2_CTRL_TYPE_U8:
+		elem_size = sizeof(u8);
+		break;
+	case V4L2_CTRL_TYPE_U16:
+		elem_size = sizeof(u16);
+		break;
+	default:
+		if (type < V4L2_CTRL_COMPOUND_TYPES)
+			elem_size = sizeof(s32);
+		break;
+	}
 	tot_ctrl_size = elem_size * elems;
 
 	/* Sanity checks */
@@ -3148,6 +3183,8 @@ int v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl,
 	case V4L2_CTRL_TYPE_MENU:
 	case V4L2_CTRL_TYPE_INTEGER_MENU:
 	case V4L2_CTRL_TYPE_BITMASK:
+	case V4L2_CTRL_TYPE_U8:
+	case V4L2_CTRL_TYPE_U16:
 		if (ctrl->is_array)
 			return -EINVAL;
 		ret = check_range(ctrl->type, min, max, step, def);
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 7915b1125bb5..c630345e9fd2 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -39,12 +39,16 @@ struct poll_table_struct;
 /** union v4l2_ctrl_ptr - A pointer to a control value.
  * @p_s32:	Pointer to a 32-bit signed value.
  * @p_s64:	Pointer to a 64-bit signed value.
+ * @p_u8:	Pointer to a 8-bit unsigned value.
+ * @p_u16:	Pointer to a 16-bit unsigned value.
  * @p_char:	Pointer to a string.
  * @p:		Pointer to a compound value.
  */
 union v4l2_ctrl_ptr {
 	s32 *p_s32;
 	s64 *p_s64;
+	u8 *p_u8;
+	u16 *p_u16;
 	char *p_char;
 	void *p;
 };
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 0297980fe491..632de96b42d5 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1254,6 +1254,8 @@ struct v4l2_ext_control {
 		__s32 value;
 		__s64 value64;
 		char *string;
+		__u8 *p_u8;
+		__u16 *p_u16;
 		void *ptr;
 	};
 } __attribute__ ((packed));
@@ -1284,6 +1286,8 @@ enum v4l2_ctrl_type {
 
 	/* Compound types are >= 0x0100 */
 	V4L2_CTRL_COMPOUND_TYPES     = 0x0100,
+	V4L2_CTRL_TYPE_U8	     = 0x0100,
+	V4L2_CTRL_TYPE_U16	     = 0x0101,
 };
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
-- 
cgit v1.2.3


From a77b4fc0bc328b0989f83220aba1c268e087107f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 28 Mar 2014 13:00:08 -0300
Subject: [media] v4l2-ctrls/v4l2-controls.h: add MD controls

Add the 'Detect' control class and the new motion detection controls.
Those controls will be used by the solo6x10 and go7007 drivers.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 27 +++++++++++++++++++++++++++
 include/uapi/linux/v4l2-controls.h   | 17 +++++++++++++++++
 2 files changed, 44 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 5aaf15e047c8..5c3b8de82e35 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -462,6 +462,13 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"RGB full range (0-255)",
 		NULL,
 	};
+	static const char * const detect_md_mode[] = {
+		"Disabled",
+		"Global",
+		"Threshold Grid",
+		"Region Grid",
+		NULL,
+	};
 
 
 	switch (id) {
@@ -553,6 +560,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 	case V4L2_CID_DV_TX_RGB_RANGE:
 	case V4L2_CID_DV_RX_RGB_RANGE:
 		return dv_rgb_range;
+	case V4L2_CID_DETECT_MD_MODE:
+		return detect_md_mode;
 
 	default:
 		return NULL;
@@ -874,6 +883,15 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:	return "Bandwidth, Auto";
 	case V4L2_CID_RF_TUNER_BANDWIDTH:	return "Bandwidth";
 	case V4L2_CID_RF_TUNER_PLL_LOCK:	return "PLL Lock";
+
+	/* Detection controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_DETECT_CLASS:		return "Detection Controls";
+	case V4L2_CID_DETECT_MD_MODE:		return "Motion Detection Mode";
+	case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: return "MD Global Threshold";
+	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:	return "MD Threshold Grid";
+	case V4L2_CID_DETECT_MD_REGION_GRID:	return "MD Region Grid";
+
 	default:
 		return NULL;
 	}
@@ -992,6 +1010,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_TEST_PATTERN:
 	case V4L2_CID_TUNE_DEEMPHASIS:
 	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
+	case V4L2_CID_DETECT_MD_MODE:
 		*type = V4L2_CTRL_TYPE_MENU;
 		break;
 	case V4L2_CID_LINK_FREQ:
@@ -1018,6 +1037,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_DV_CLASS:
 	case V4L2_CID_FM_RX_CLASS:
 	case V4L2_CID_RF_TUNER_CLASS:
+	case V4L2_CID_DETECT_CLASS:
 		*type = V4L2_CTRL_TYPE_CTRL_CLASS;
 		/* You can neither read not write these */
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY;
@@ -1063,6 +1083,12 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 		*type = V4L2_CTRL_TYPE_INTEGER64;
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
 		break;
+	case V4L2_CID_DETECT_MD_REGION_GRID:
+		*type = V4L2_CTRL_TYPE_U8;
+		break;
+	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:
+		*type = V4L2_CTRL_TYPE_U16;
+		break;
 	default:
 		*type = V4L2_CTRL_TYPE_INTEGER;
 		break;
@@ -1103,6 +1129,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_RF_TUNER_MIXER_GAIN:
 	case V4L2_CID_RF_TUNER_IF_GAIN:
 	case V4L2_CID_RF_TUNER_BANDWIDTH:
+	case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD:
 		*flags |= V4L2_CTRL_FLAG_SLIDER;
 		break;
 	case V4L2_CID_PAN_RELATIVE:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 2ac5597f3ee1..db526d1c8309 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -61,6 +61,7 @@
 #define V4L2_CTRL_CLASS_DV		0x00a00000	/* Digital Video controls */
 #define V4L2_CTRL_CLASS_FM_RX		0x00a10000	/* FM Receiver controls */
 #define V4L2_CTRL_CLASS_RF_TUNER	0x00a20000	/* RF tuner controls */
+#define V4L2_CTRL_CLASS_DETECT		0x00a30000	/* Detection controls */
 
 /* User-class control IDs */
 
@@ -914,4 +915,20 @@ enum v4l2_deemphasis {
 #define V4L2_CID_RF_TUNER_IF_GAIN		(V4L2_CID_RF_TUNER_CLASS_BASE + 62)
 #define V4L2_CID_RF_TUNER_PLL_LOCK			(V4L2_CID_RF_TUNER_CLASS_BASE + 91)
 
+
+/*  Detection-class control IDs defined by V4L2 */
+#define V4L2_CID_DETECT_CLASS_BASE		(V4L2_CTRL_CLASS_DETECT | 0x900)
+#define V4L2_CID_DETECT_CLASS			(V4L2_CTRL_CLASS_DETECT | 1)
+
+#define V4L2_CID_DETECT_MD_MODE			(V4L2_CID_DETECT_CLASS_BASE + 1)
+enum v4l2_detect_md_mode {
+	V4L2_DETECT_MD_MODE_DISABLED		= 0,
+	V4L2_DETECT_MD_MODE_GLOBAL		= 1,
+	V4L2_DETECT_MD_MODE_THRESHOLD_GRID	= 2,
+	V4L2_DETECT_MD_MODE_REGION_GRID		= 3,
+};
+#define V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD	(V4L2_CID_DETECT_CLASS_BASE + 2)
+#define V4L2_CID_DETECT_MD_THRESHOLD_GRID	(V4L2_CID_DETECT_CLASS_BASE + 3)
+#define V4L2_CID_DETECT_MD_REGION_GRID		(V4L2_CID_DETECT_CLASS_BASE + 4)
+
 #endif
-- 
cgit v1.2.3


From 78ea611385f08da6e52ff2ea15f0a26e89acb3ab Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 27 Jan 2014 09:26:23 -0300
Subject: [media] v4l2: add a motion detection event

Add a new MOTION_DET event to signal when motion is detected.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 632de96b42d5..1477abebd35b 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1799,6 +1799,7 @@ struct v4l2_streamparm {
 #define V4L2_EVENT_CTRL				3
 #define V4L2_EVENT_FRAME_SYNC			4
 #define V4L2_EVENT_SOURCE_CHANGE		5
+#define V4L2_EVENT_MOTION_DET			6
 #define V4L2_EVENT_PRIVATE_START		0x08000000
 
 /* Payload for V4L2_EVENT_VSYNC */
@@ -1836,6 +1837,21 @@ struct v4l2_event_src_change {
 	__u32 changes;
 };
 
+#define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ	(1 << 0)
+
+/**
+ * struct v4l2_event_motion_det - motion detection event
+ * @flags:             if V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ is set, then the
+ *                     frame_sequence field is valid.
+ * @frame_sequence:    the frame sequence number associated with this event.
+ * @region_mask:       which regions detected motion.
+ */
+struct v4l2_event_motion_det {
+	__u32 flags;
+	__u32 frame_sequence;
+	__u32 region_mask;
+};
+
 struct v4l2_event {
 	__u32				type;
 	union {
@@ -1843,6 +1859,7 @@ struct v4l2_event {
 		struct v4l2_event_ctrl		ctrl;
 		struct v4l2_event_frame_sync	frame_sync;
 		struct v4l2_event_src_change	src_change;
+		struct v4l2_event_motion_det	motion_det;
 		__u8				data[64];
 	} u;
 	__u32				pending;
-- 
cgit v1.2.3


From 977ff0e4fb3460df9f3dd27de92d60786be28645 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Wed, 21 May 2014 22:07:55 -0300
Subject: [media] v4l: Add ARGB and XRGB pixel formats

The existing RGB pixel formats are ill-defined in respect to their alpha
bits and their meaning is driver dependent. Create new standard ARGB and
XRGB variants with clearly defined meanings and make the existing
variants deprecated.

The new pixel formats 4CC values have been selected to match the DRM
4CCs for the same in-memory formats.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 .../DocBook/media/v4l/pixfmt-packed-rgb.xml        | 415 ++++++++++++++++++++-
 include/uapi/linux/videodev2.h                     |   8 +
 2 files changed, 403 insertions(+), 20 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
index e1c4f8b4c0b3..5f1602fe5494 100644
--- a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
@@ -130,9 +130,9 @@ colorspace <constant>V4L2_COLORSPACE_SRGB</constant>.</para>
 	    <entry>b<subscript>1</subscript></entry>
 	    <entry>b<subscript>0</subscript></entry>
 	  </row>
-	  <row id="V4L2-PIX-FMT-RGB444">
-	    <entry><constant>V4L2_PIX_FMT_RGB444</constant></entry>
-	    <entry>'R444'</entry>
+	  <row id="V4L2-PIX-FMT-ARGB444">
+	    <entry><constant>V4L2_PIX_FMT_ARGB444</constant></entry>
+	    <entry>'AR12'</entry>
 	    <entry></entry>
 	    <entry>g<subscript>3</subscript></entry>
 	    <entry>g<subscript>2</subscript></entry>
@@ -152,9 +152,31 @@ colorspace <constant>V4L2_COLORSPACE_SRGB</constant>.</para>
 	    <entry>r<subscript>1</subscript></entry>
 	    <entry>r<subscript>0</subscript></entry>
 	  </row>
-	  <row id="V4L2-PIX-FMT-RGB555">
-	    <entry><constant>V4L2_PIX_FMT_RGB555</constant></entry>
-	    <entry>'RGBO'</entry>
+	  <row id="V4L2-PIX-FMT-XRGB444">
+	    <entry><constant>V4L2_PIX_FMT_XRGB444</constant></entry>
+	    <entry>'XR12'</entry>
+	    <entry></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	  </row>
+	  <row id="V4L2-PIX-FMT-ARGB555">
+	    <entry><constant>V4L2_PIX_FMT_ARGB555</constant></entry>
+	    <entry>'AR15'</entry>
 	    <entry></entry>
 	    <entry>g<subscript>2</subscript></entry>
 	    <entry>g<subscript>1</subscript></entry>
@@ -174,6 +196,28 @@ colorspace <constant>V4L2_COLORSPACE_SRGB</constant>.</para>
 	    <entry>g<subscript>4</subscript></entry>
 	    <entry>g<subscript>3</subscript></entry>
 	  </row>
+	  <row id="V4L2-PIX-FMT-XRGB555">
+	    <entry><constant>V4L2_PIX_FMT_XRGB555</constant></entry>
+	    <entry>'XR15'</entry>
+	    <entry></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry>b<subscript>4</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>-</entry>
+	    <entry>r<subscript>4</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	    <entry>g<subscript>4</subscript></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	  </row>
 	  <row id="V4L2-PIX-FMT-RGB565">
 	    <entry><constant>V4L2_PIX_FMT_RGB565</constant></entry>
 	    <entry>'RGBP'</entry>
@@ -341,9 +385,9 @@ colorspace <constant>V4L2_COLORSPACE_SRGB</constant>.</para>
 	    <entry>b<subscript>1</subscript></entry>
 	    <entry>b<subscript>0</subscript></entry>
 	  </row>
-	  <row id="V4L2-PIX-FMT-BGR32">
-	    <entry><constant>V4L2_PIX_FMT_BGR32</constant></entry>
-	    <entry>'BGR4'</entry>
+	  <row id="V4L2-PIX-FMT-ABGR32">
+	    <entry><constant>V4L2_PIX_FMT_ABGR32</constant></entry>
+	    <entry>'AR24'</entry>
 	    <entry></entry>
 	    <entry>b<subscript>7</subscript></entry>
 	    <entry>b<subscript>6</subscript></entry>
@@ -381,9 +425,49 @@ colorspace <constant>V4L2_COLORSPACE_SRGB</constant>.</para>
 	    <entry>a<subscript>1</subscript></entry>
 	    <entry>a<subscript>0</subscript></entry>
 	  </row>
-	  <row id="V4L2-PIX-FMT-RGB32">
-	    <entry><constant>V4L2_PIX_FMT_RGB32</constant></entry>
-	    <entry>'RGB4'</entry>
+	  <row id="V4L2-PIX-FMT-XBGR32">
+	    <entry><constant>V4L2_PIX_FMT_XBGR32</constant></entry>
+	    <entry>'XR24'</entry>
+	    <entry></entry>
+	    <entry>b<subscript>7</subscript></entry>
+	    <entry>b<subscript>6</subscript></entry>
+	    <entry>b<subscript>5</subscript></entry>
+	    <entry>b<subscript>4</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>g<subscript>7</subscript></entry>
+	    <entry>g<subscript>6</subscript></entry>
+	    <entry>g<subscript>5</subscript></entry>
+	    <entry>g<subscript>4</subscript></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>r<subscript>7</subscript></entry>
+	    <entry>r<subscript>6</subscript></entry>
+	    <entry>r<subscript>5</subscript></entry>
+	    <entry>r<subscript>4</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	  </row>
+	  <row id="V4L2-PIX-FMT-ARGB32">
+	    <entry><constant>V4L2_PIX_FMT_ARGB32</constant></entry>
+	    <entry>'AX24'</entry>
 	    <entry></entry>
 	    <entry>a<subscript>7</subscript></entry>
 	    <entry>a<subscript>6</subscript></entry>
@@ -421,18 +505,76 @@ colorspace <constant>V4L2_COLORSPACE_SRGB</constant>.</para>
 	    <entry>b<subscript>1</subscript></entry>
 	    <entry>b<subscript>0</subscript></entry>
 	  </row>
+	  <row id="V4L2-PIX-FMT-XRGB32">
+	    <entry><constant>V4L2_PIX_FMT_XRGB32</constant></entry>
+	    <entry>'BX24'</entry>
+	    <entry></entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry>-</entry>
+	    <entry></entry>
+	    <entry>r<subscript>7</subscript></entry>
+	    <entry>r<subscript>6</subscript></entry>
+	    <entry>r<subscript>5</subscript></entry>
+	    <entry>r<subscript>4</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>g<subscript>7</subscript></entry>
+	    <entry>g<subscript>6</subscript></entry>
+	    <entry>g<subscript>5</subscript></entry>
+	    <entry>g<subscript>4</subscript></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>b<subscript>7</subscript></entry>
+	    <entry>b<subscript>6</subscript></entry>
+	    <entry>b<subscript>5</subscript></entry>
+	    <entry>b<subscript>4</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	  </row>
 	</tbody>
       </tgroup>
     </table>
 
-    <para>Bit 7 is the most significant bit. The value of the a = alpha
-bits is undefined when reading from the driver, ignored when writing
-to the driver, except when alpha blending has been negotiated for a
-<link linkend="overlay">Video Overlay</link> or <link linkend="osd">
-Video Output Overlay</link> or when the alpha component has been configured
-for a <link linkend="capture">Video Capture</link> by means of <link
-linkend="v4l2-alpha-component"> <constant>V4L2_CID_ALPHA_COMPONENT
-</constant> </link> control.</para>
+    <para>Bit 7 is the most significant bit.</para>
+
+    <para>The usage and value of the alpha bits (a) in the ARGB and ABGR formats
+    (collectively referred to as alpha formats) depend on the device type and
+    hardware operation. <link linkend="capture">Capture</link> devices
+    (including capture queues of mem-to-mem devices) fill the alpha component in
+    memory. When the device outputs an alpha channel the alpha component will
+    have a meaningful value. Otherwise, when the device doesn't output an alpha
+    channel but can set the alpha bit to a user-configurable value, the <link
+    linkend="v4l2-alpha-component"><constant>V4L2_CID_ALPHA_COMPONENT</constant>
+    </link> control is used to specify that alpha value, and the alpha component
+    of all pixels will be set to the value specified by that control. Otherwise
+    a corresponding format without an alpha component (XRGB or XBGR) must be
+    used instead of an alpha format.</para>
+
+    <para><link linkend="output">Output</link> devices (including output queues
+    of mem-to-mem devices and <link linkend="osd">video output overlay</link>
+    devices) read the alpha component from memory. When the device processes the
+    alpha channel the alpha component must be filled with meaningful values by
+    applications. Otherwise a corresponding format without an alpha component
+    (XRGB or XBGR) must be used instead of an alpha format.</para>
+
+    <para>The XRGB and XBGR formats contain undefined bits (-). Applications,
+    devices and drivers must ignore those bits, for both <link
+    linkend="capture">capture</link> and <link linkend="output">output</link>
+    devices.</para>
 
     <example>
       <title><constant>V4L2_PIX_FMT_BGR24</constant> 4 &times; 4 pixel
@@ -512,6 +654,239 @@ image</title>
       </formalpara>
     </example>
 
+    <para>Formats defined in <xref linkend="rgb-formats-deprecated"/> are
+    deprecated and must not be used by new drivers. They are documented here for
+    reference. The meaning of their alpha bits (a) is ill-defined and
+    interpreted as in either the corresponding ARGB or XRGB format, depending on
+    the driver.</para>
+
+    <table pgwide="1" frame="none" id="rgb-formats-deprecated">
+      <title>Deprecated Packed RGB Image Formats</title>
+      <tgroup cols="37" align="center">
+	<colspec colname="id" align="left" />
+	<colspec colname="fourcc" />
+	<colspec colname="bit" />
+
+	<colspec colnum="4" colname="b07" align="center" />
+	<colspec colnum="5" colname="b06" align="center" />
+	<colspec colnum="6" colname="b05" align="center" />
+	<colspec colnum="7" colname="b04" align="center" />
+	<colspec colnum="8" colname="b03" align="center" />
+	<colspec colnum="9" colname="b02" align="center" />
+	<colspec colnum="10" colname="b01" align="center" />
+	<colspec colnum="11" colname="b00" align="center" />
+
+	<colspec colnum="13" colname="b17" align="center" />
+	<colspec colnum="14" colname="b16" align="center" />
+	<colspec colnum="15" colname="b15" align="center" />
+	<colspec colnum="16" colname="b14" align="center" />
+	<colspec colnum="17" colname="b13" align="center" />
+	<colspec colnum="18" colname="b12" align="center" />
+	<colspec colnum="19" colname="b11" align="center" />
+	<colspec colnum="20" colname="b10" align="center" />
+
+	<colspec colnum="22" colname="b27" align="center" />
+	<colspec colnum="23" colname="b26" align="center" />
+	<colspec colnum="24" colname="b25" align="center" />
+	<colspec colnum="25" colname="b24" align="center" />
+	<colspec colnum="26" colname="b23" align="center" />
+	<colspec colnum="27" colname="b22" align="center" />
+	<colspec colnum="28" colname="b21" align="center" />
+	<colspec colnum="29" colname="b20" align="center" />
+
+	<colspec colnum="31" colname="b37" align="center" />
+	<colspec colnum="32" colname="b36" align="center" />
+	<colspec colnum="33" colname="b35" align="center" />
+	<colspec colnum="34" colname="b34" align="center" />
+	<colspec colnum="35" colname="b33" align="center" />
+	<colspec colnum="36" colname="b32" align="center" />
+	<colspec colnum="37" colname="b31" align="center" />
+	<colspec colnum="38" colname="b30" align="center" />
+
+	<spanspec namest="b07" nameend="b00" spanname="b0" />
+	<spanspec namest="b17" nameend="b10" spanname="b1" />
+	<spanspec namest="b27" nameend="b20" spanname="b2" />
+	<spanspec namest="b37" nameend="b30" spanname="b3" />
+	<thead>
+	  <row>
+	    <entry>Identifier</entry>
+	    <entry>Code</entry>
+	    <entry>&nbsp;</entry>
+	    <entry spanname="b0">Byte&nbsp;0 in memory</entry>
+	    <entry spanname="b1">Byte&nbsp;1</entry>
+	    <entry spanname="b2">Byte&nbsp;2</entry>
+	    <entry spanname="b3">Byte&nbsp;3</entry>
+	  </row>
+	  <row>
+	    <entry>&nbsp;</entry>
+	    <entry>&nbsp;</entry>
+	    <entry>Bit</entry>
+	    <entry>7</entry>
+	    <entry>6</entry>
+	    <entry>5</entry>
+	    <entry>4</entry>
+	    <entry>3</entry>
+	    <entry>2</entry>
+	    <entry>1</entry>
+	    <entry>0</entry>
+	    <entry>&nbsp;</entry>
+	    <entry>7</entry>
+	    <entry>6</entry>
+	    <entry>5</entry>
+	    <entry>4</entry>
+	    <entry>3</entry>
+	    <entry>2</entry>
+	    <entry>1</entry>
+	    <entry>0</entry>
+	    <entry>&nbsp;</entry>
+	    <entry>7</entry>
+	    <entry>6</entry>
+	    <entry>5</entry>
+	    <entry>4</entry>
+	    <entry>3</entry>
+	    <entry>2</entry>
+	    <entry>1</entry>
+	    <entry>0</entry>
+	    <entry>&nbsp;</entry>
+	    <entry>7</entry>
+	    <entry>6</entry>
+	    <entry>5</entry>
+	    <entry>4</entry>
+	    <entry>3</entry>
+	    <entry>2</entry>
+	    <entry>1</entry>
+	    <entry>0</entry>
+	  </row>
+	</thead>
+	<tbody>
+	  <row id="V4L2-PIX-FMT-RGB444">
+	    <entry><constant>V4L2_PIX_FMT_RGB444</constant></entry>
+	    <entry>'R444'</entry>
+	    <entry></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>a<subscript>3</subscript></entry>
+	    <entry>a<subscript>2</subscript></entry>
+	    <entry>a<subscript>1</subscript></entry>
+	    <entry>a<subscript>0</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	  </row>
+	  <row id="V4L2-PIX-FMT-RGB555">
+	    <entry><constant>V4L2_PIX_FMT_RGB555</constant></entry>
+	    <entry>'RGBO'</entry>
+	    <entry></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry>b<subscript>4</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>a</entry>
+	    <entry>r<subscript>4</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	    <entry>g<subscript>4</subscript></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	  </row>
+	  <row id="V4L2-PIX-FMT-BGR32">
+	    <entry><constant>V4L2_PIX_FMT_BGR32</constant></entry>
+	    <entry>'BGR4'</entry>
+	    <entry></entry>
+	    <entry>b<subscript>7</subscript></entry>
+	    <entry>b<subscript>6</subscript></entry>
+	    <entry>b<subscript>5</subscript></entry>
+	    <entry>b<subscript>4</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>g<subscript>7</subscript></entry>
+	    <entry>g<subscript>6</subscript></entry>
+	    <entry>g<subscript>5</subscript></entry>
+	    <entry>g<subscript>4</subscript></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>r<subscript>7</subscript></entry>
+	    <entry>r<subscript>6</subscript></entry>
+	    <entry>r<subscript>5</subscript></entry>
+	    <entry>r<subscript>4</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>a<subscript>7</subscript></entry>
+	    <entry>a<subscript>6</subscript></entry>
+	    <entry>a<subscript>5</subscript></entry>
+	    <entry>a<subscript>4</subscript></entry>
+	    <entry>a<subscript>3</subscript></entry>
+	    <entry>a<subscript>2</subscript></entry>
+	    <entry>a<subscript>1</subscript></entry>
+	    <entry>a<subscript>0</subscript></entry>
+	  </row>
+	  <row id="V4L2-PIX-FMT-RGB32">
+	    <entry><constant>V4L2_PIX_FMT_RGB32</constant></entry>
+	    <entry>'RGB4'</entry>
+	    <entry></entry>
+	    <entry>a<subscript>7</subscript></entry>
+	    <entry>a<subscript>6</subscript></entry>
+	    <entry>a<subscript>5</subscript></entry>
+	    <entry>a<subscript>4</subscript></entry>
+	    <entry>a<subscript>3</subscript></entry>
+	    <entry>a<subscript>2</subscript></entry>
+	    <entry>a<subscript>1</subscript></entry>
+	    <entry>a<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>r<subscript>7</subscript></entry>
+	    <entry>r<subscript>6</subscript></entry>
+	    <entry>r<subscript>5</subscript></entry>
+	    <entry>r<subscript>4</subscript></entry>
+	    <entry>r<subscript>3</subscript></entry>
+	    <entry>r<subscript>2</subscript></entry>
+	    <entry>r<subscript>1</subscript></entry>
+	    <entry>r<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>g<subscript>7</subscript></entry>
+	    <entry>g<subscript>6</subscript></entry>
+	    <entry>g<subscript>5</subscript></entry>
+	    <entry>g<subscript>4</subscript></entry>
+	    <entry>g<subscript>3</subscript></entry>
+	    <entry>g<subscript>2</subscript></entry>
+	    <entry>g<subscript>1</subscript></entry>
+	    <entry>g<subscript>0</subscript></entry>
+	    <entry></entry>
+	    <entry>b<subscript>7</subscript></entry>
+	    <entry>b<subscript>6</subscript></entry>
+	    <entry>b<subscript>5</subscript></entry>
+	    <entry>b<subscript>4</subscript></entry>
+	    <entry>b<subscript>3</subscript></entry>
+	    <entry>b<subscript>2</subscript></entry>
+	    <entry>b<subscript>1</subscript></entry>
+	    <entry>b<subscript>0</subscript></entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
     <para>A test utility to determine which RGB formats a driver
 actually supports is available from the LinuxTV v4l-dvb repository.
 See &v4l-dvb; for access instructions.</para>
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 1477abebd35b..a498d8b58679 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -294,7 +294,11 @@ struct v4l2_pix_format {
 /* RGB formats */
 #define V4L2_PIX_FMT_RGB332  v4l2_fourcc('R', 'G', 'B', '1') /*  8  RGB-3-3-2     */
 #define V4L2_PIX_FMT_RGB444  v4l2_fourcc('R', '4', '4', '4') /* 16  xxxxrrrr ggggbbbb */
+#define V4L2_PIX_FMT_ARGB444 v4l2_fourcc('A', 'R', '1', '2') /* 16  aaaarrrr ggggbbbb */
+#define V4L2_PIX_FMT_XRGB444 v4l2_fourcc('X', 'R', '1', '2') /* 16  xxxxrrrr ggggbbbb */
 #define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R', 'G', 'B', 'O') /* 16  RGB-5-5-5     */
+#define V4L2_PIX_FMT_ARGB555 v4l2_fourcc('A', 'R', '1', '5') /* 16  ARGB-1-5-5-5  */
+#define V4L2_PIX_FMT_XRGB555 v4l2_fourcc('X', 'R', '1', '5') /* 16  XRGB-1-5-5-5  */
 #define V4L2_PIX_FMT_RGB565  v4l2_fourcc('R', 'G', 'B', 'P') /* 16  RGB-5-6-5     */
 #define V4L2_PIX_FMT_RGB555X v4l2_fourcc('R', 'G', 'B', 'Q') /* 16  RGB-5-5-5 BE  */
 #define V4L2_PIX_FMT_RGB565X v4l2_fourcc('R', 'G', 'B', 'R') /* 16  RGB-5-6-5 BE  */
@@ -302,7 +306,11 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_BGR24   v4l2_fourcc('B', 'G', 'R', '3') /* 24  BGR-8-8-8     */
 #define V4L2_PIX_FMT_RGB24   v4l2_fourcc('R', 'G', 'B', '3') /* 24  RGB-8-8-8     */
 #define V4L2_PIX_FMT_BGR32   v4l2_fourcc('B', 'G', 'R', '4') /* 32  BGR-8-8-8-8   */
+#define V4L2_PIX_FMT_ABGR32  v4l2_fourcc('A', 'R', '2', '4') /* 32  BGRA-8-8-8-8  */
+#define V4L2_PIX_FMT_XBGR32  v4l2_fourcc('X', 'R', '2', '4') /* 32  BGRX-8-8-8-8  */
 #define V4L2_PIX_FMT_RGB32   v4l2_fourcc('R', 'G', 'B', '4') /* 32  RGB-8-8-8-8   */
+#define V4L2_PIX_FMT_ARGB32  v4l2_fourcc('B', 'A', '2', '4') /* 32  ARGB-8-8-8-8  */
+#define V4L2_PIX_FMT_XRGB32  v4l2_fourcc('B', 'X', '2', '4') /* 32  XRGB-8-8-8-8  */
 
 /* Grey formats */
 #define V4L2_PIX_FMT_GREY    v4l2_fourcc('G', 'R', 'E', 'Y') /*  8  Greyscale     */
-- 
cgit v1.2.3


From d52e23813672c3c72f92e7b39c7408d4b9a40a96 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Tue, 27 May 2014 09:41:05 -0300
Subject: [media] v4l: Support extending the v4l2_pix_format structure

The v4l2_pix_format structure has no reserved field. It is embedded in
the v4l2_framebuffer structure which has no reserved fields either, and
in the v4l2_format structure which has reserved fields that were not
previously required to be zeroed out by applications.

To allow extending v4l2_pix_format, inline it in the v4l2_framebuffer
structure, and use the priv field as a magic value to indicate that the
application has set all v4l2_pix_format extended fields and zeroed all
reserved fields following the v4l2_pix_format field in the v4l2_format
structure.

The availability of this API extension is reported to userspace through
the new V4L2_CAP_EXT_PIX_FORMAT capability flag. Just checking that the
priv field is still set to the magic value at [GS]_FMT return wouldn't
be enough, as older kernels don't zero the priv field on return.

To simplify the internal API towards drivers zero the extended fields
and set the priv field to the magic value for applications not aware of
the extensions.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/DocBook/media/Makefile               |  2 +-
 Documentation/DocBook/media/v4l/pixfmt.xml         | 25 ++++++++-
 Documentation/DocBook/media/v4l/v4l2.xml           |  8 +++
 Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml  | 12 ++--
 .../DocBook/media/v4l/vidioc-querycap.xml          |  6 ++
 drivers/media/parport/bw-qcam.c                    |  2 -
 drivers/media/pci/cx18/cx18-ioctl.c                |  1 -
 drivers/media/pci/cx25821/cx25821-video.c          |  3 -
 drivers/media/pci/ivtv/ivtv-ioctl.c                |  3 -
 drivers/media/pci/meye/meye.c                      |  2 -
 drivers/media/pci/saa7134/saa7134-empress.c        |  3 -
 drivers/media/pci/saa7134/saa7134-video.c          |  2 -
 drivers/media/pci/sta2x11/sta2x11_vip.c            |  1 -
 drivers/media/platform/coda.c                      |  2 -
 drivers/media/platform/davinci/vpif_display.c      |  1 -
 drivers/media/platform/mem2mem_testdev.c           |  1 -
 drivers/media/platform/omap/omap_vout.c            |  2 -
 drivers/media/platform/sh_veu.c                    |  2 -
 drivers/media/platform/vino.c                      |  5 --
 drivers/media/platform/vivi.c                      |  1 -
 drivers/media/usb/cx231xx/cx231xx-417.c            |  2 -
 drivers/media/usb/cx231xx/cx231xx-video.c          |  2 -
 drivers/media/usb/gspca/gspca.c                    |  8 +--
 drivers/media/usb/hdpvr/hdpvr-video.c              |  1 -
 drivers/media/usb/stkwebcam/stk-webcam.c           |  2 -
 drivers/media/usb/tlg2300/pd-video.c               |  1 -
 drivers/media/usb/tm6000/tm6000-video.c            |  2 -
 drivers/media/usb/zr364xx/zr364xx.c                |  3 -
 drivers/media/v4l2-core/v4l2-compat-ioctl32.c      | 19 +++++--
 drivers/media/v4l2-core/v4l2-ioctl.c               | 65 ++++++++++++++++++++--
 include/uapi/linux/videodev2.h                     | 15 ++++-
 31 files changed, 134 insertions(+), 70 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile
index 639e74857968..df2962d9e11e 100644
--- a/Documentation/DocBook/media/Makefile
+++ b/Documentation/DocBook/media/Makefile
@@ -174,7 +174,7 @@ FILENAME = \
 DOCUMENTED = \
 	-e "s/\(enum *\)v4l2_mpeg_cx2341x_video_\([a-z]*_spatial_filter_type\)/\1<link linkend=\"\2\">v4l2_mpeg_cx2341x_video_\2<\/link>/g" \
 	-e "s/\(\(enum\|struct\) *\)\(v4l2_[a-zA-Z0-9_]*\)/\1<link linkend=\"\3\">\3<\/link>/g" \
-	-e "s/\(V4L2_PIX_FMT_[A-Z0-9_]\+\) /<link linkend=\"\1\">\1<\/link> /g" \
+	-e "s/\(V4L2_PIX_FMT_[A-Z0-9_]\+\)\(\s\+v4l2_fourcc\)/<link linkend=\"\1\">\1<\/link>\2/g" \
 	-e ":a;s/\(linkend=\".*\)_\(.*\">\)/\1-\2/;ta" \
 	-e "s/v4l2\-mpeg\-vbi\-ITV0/v4l2-mpeg-vbi-itv0-1/g"
 
diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index 91dcbc84f3f8..bb36b3829cf9 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -112,9 +112,28 @@ see <xref linkend="colorspaces" />.</entry>
 	<row>
 	  <entry>__u32</entry>
 	  <entry><structfield>priv</structfield></entry>
-	  <entry>Reserved for custom (driver defined) additional
-information about formats. When not used drivers and applications must
-set this field to zero.</entry>
+	  <entry><para>This field indicates whether the remaining fields of the
+<structname>v4l2_pix_format</structname> structure, also called the extended
+fields, are valid. When set to <constant>V4L2_PIX_FMT_PRIV_MAGIC</constant>, it
+indicates that the extended fields have been correctly initialized. When set to
+any other value it indicates that the extended fields contain undefined values.
+</para>
+<para>Applications that wish to use the pixel format extended fields must first
+ensure that the feature is supported by querying the device for the
+<link linkend="querycap"><constant>V4L2_CAP_EXT_PIX_FORMAT</constant></link>
+capability. If the capability isn't set the pixel format extended fields are not
+supported and using the extended fields will lead to undefined results.</para>
+<para>To use the extended fields, applications must set the
+<structfield>priv</structfield> field to
+<constant>V4L2_PIX_FMT_PRIV_MAGIC</constant>, initialize all the extended fields
+and zero the unused bytes of the <structname>v4l2_format</structname>
+<structfield>raw_data</structfield> field.</para>
+<para>When the <structfield>priv</structfield> field isn't set to
+<constant>V4L2_PIX_FMT_PRIV_MAGIC</constant> drivers must act as if all the
+extended fields were set to zero. On return drivers must set the
+<structfield>priv</structfield> field to
+<constant>V4L2_PIX_FMT_PRIV_MAGIC</constant> and all the extended fields to
+applicable values.</para></entry>
 	</row>
       </tbody>
     </tgroup>
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml
index b445161b912c..d0a48bebfa52 100644
--- a/Documentation/DocBook/media/v4l/v4l2.xml
+++ b/Documentation/DocBook/media/v4l/v4l2.xml
@@ -151,6 +151,14 @@ structs, ioctls) must be noted in more detail in the history chapter
 (compat.xml), along with the possible impact on existing drivers and
 applications. -->
 
+      <revision>
+	<revnumber>3.16</revnumber>
+	<date>2014-05-27</date>
+	<authorinitials>lp</authorinitials>
+	<revremark>Extended &v4l2-pix-format;.
+	</revremark>
+      </revision>
+
       <revision>
 	<revnumber>3.15</revnumber>
 	<date>2014-02-03</date>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml b/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml
index 7c63815e7afd..20460730b02c 100644
--- a/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml
@@ -152,13 +152,10 @@ a valid base address, so applications can find the corresponding Linux
 framebuffer device (see <xref linkend="osd" />).</entry>
 	  </row>
 	  <row>
-	    <entry>&v4l2-pix-format;</entry>
+	    <entry>struct</entry>
 	    <entry><structfield>fmt</structfield></entry>
 	    <entry></entry>
-	    <entry>Layout of the frame buffer. The
-<structname>v4l2_pix_format</structname> structure is defined in <xref
-linkend="pixfmt" />, for clarification the fields and acceptable values
-	    are listed below:</entry>
+	    <entry>Layout of the frame buffer.</entry>
 	  </row>
 	  <row>
 	    <entry></entry>
@@ -276,9 +273,8 @@ see <xref linkend="colorspaces" />.</entry>
 	    <entry></entry>
 	    <entry>__u32</entry>
 	    <entry><structfield>priv</structfield></entry>
-	    <entry>Reserved for additional information about custom
-(driver defined) formats. When not used drivers and applications must
-set this field to zero.</entry>
+	    <entry>Reserved. Drivers and applications must set this field to
+zero.</entry>
 	  </row>
 	</tbody>
       </tgroup>
diff --git a/Documentation/DocBook/media/v4l/vidioc-querycap.xml b/Documentation/DocBook/media/v4l/vidioc-querycap.xml
index 370d49d6fb64..d0c5e604f014 100644
--- a/Documentation/DocBook/media/v4l/vidioc-querycap.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-querycap.xml
@@ -300,6 +300,12 @@ modulator programming see
 	    <entry>0x00100000</entry>
 	    <entry>The device supports the
 <link linkend="sdr">SDR Capture</link> interface.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_CAP_EXT_PIX_FORMAT</constant></entry>
+	    <entry>0x00200000</entry>
+	    <entry>The device supports the &v4l2-pix-format; extended
+fields.</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_CAP_READWRITE</constant></entry>
diff --git a/drivers/media/parport/bw-qcam.c b/drivers/media/parport/bw-qcam.c
index 3c5dac4c1eab..67b9da1dc43f 100644
--- a/drivers/media/parport/bw-qcam.c
+++ b/drivers/media/parport/bw-qcam.c
@@ -759,7 +759,6 @@ static int qcam_g_fmt_vid_cap(struct file *file, void *fh, struct v4l2_format *f
 	pix->sizeimage = pix->width * pix->height;
 	/* Just a guess */
 	pix->colorspace = V4L2_COLORSPACE_SRGB;
-	pix->priv = 0;
 	return 0;
 }
 
@@ -785,7 +784,6 @@ static int qcam_try_fmt_vid_cap(struct file *file, void *fh, struct v4l2_format
 	pix->sizeimage = pix->width * pix->height;
 	/* Just a guess */
 	pix->colorspace = V4L2_COLORSPACE_SRGB;
-	pix->priv = 0;
 	return 0;
 }
 
diff --git a/drivers/media/pci/cx18/cx18-ioctl.c b/drivers/media/pci/cx18/cx18-ioctl.c
index fefb2cd35838..6f2b59042b73 100644
--- a/drivers/media/pci/cx18/cx18-ioctl.c
+++ b/drivers/media/pci/cx18/cx18-ioctl.c
@@ -156,7 +156,6 @@ static int cx18_g_fmt_vid_cap(struct file *file, void *fh,
 	pixfmt->height = cx->cxhdl.height;
 	pixfmt->colorspace = V4L2_COLORSPACE_SMPTE170M;
 	pixfmt->field = V4L2_FIELD_INTERLACED;
-	pixfmt->priv = 0;
 	if (id->type == CX18_ENC_STREAM_TYPE_YUV) {
 		pixfmt->pixelformat = s->pixelformat;
 		pixfmt->sizeimage = s->vb_bytes_per_frame;
diff --git a/drivers/media/pci/cx25821/cx25821-video.c b/drivers/media/pci/cx25821/cx25821-video.c
index 8d2f1abeef77..3a419f134584 100644
--- a/drivers/media/pci/cx25821/cx25821-video.c
+++ b/drivers/media/pci/cx25821/cx25821-video.c
@@ -576,7 +576,6 @@ static int cx25821_vidioc_g_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.bytesperline = (chan->width * chan->fmt->depth) >> 3;
 	f->fmt.pix.sizeimage = chan->height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
@@ -615,7 +614,6 @@ static int cx25821_vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.bytesperline = (f->fmt.pix.width * fmt->depth) >> 3;
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
@@ -867,7 +865,6 @@ static int cx25821_vidioc_try_fmt_vid_out(struct file *file, void *priv,
 	f->fmt.pix.bytesperline = (f->fmt.pix.width * fmt->depth) >> 3;
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
-	f->fmt.pix.priv = 0;
 	return 0;
 }
 
diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c
index b3667a00db3a..3e0cb77d5930 100644
--- a/drivers/media/pci/ivtv/ivtv-ioctl.c
+++ b/drivers/media/pci/ivtv/ivtv-ioctl.c
@@ -351,7 +351,6 @@ static int ivtv_g_fmt_vid_cap(struct file *file, void *fh, struct v4l2_format *f
 	pixfmt->height = itv->cxhdl.height;
 	pixfmt->colorspace = V4L2_COLORSPACE_SMPTE170M;
 	pixfmt->field = V4L2_FIELD_INTERLACED;
-	pixfmt->priv = 0;
 	if (id->type == IVTV_ENC_STREAM_TYPE_YUV) {
 		pixfmt->pixelformat = V4L2_PIX_FMT_HM12;
 		/* YUV size is (Y=(h*720) + UV=(h*(720/2))) */
@@ -418,7 +417,6 @@ static int ivtv_g_fmt_vid_out(struct file *file, void *fh, struct v4l2_format *f
 	pixfmt->height = itv->main_rect.height;
 	pixfmt->colorspace = V4L2_COLORSPACE_SMPTE170M;
 	pixfmt->field = V4L2_FIELD_INTERLACED;
-	pixfmt->priv = 0;
 	if (id->type == IVTV_DEC_STREAM_TYPE_YUV) {
 		switch (itv->yuv_info.lace_mode & IVTV_YUV_MODE_MASK) {
 		case IVTV_YUV_MODE_INTERLACED:
@@ -1384,7 +1382,6 @@ static int ivtv_g_fbuf(struct file *file, void *fh, struct v4l2_framebuffer *fb)
 	fb->fmt.bytesperline = fb->fmt.width;
 	fb->fmt.colorspace = V4L2_COLORSPACE_SMPTE170M;
 	fb->fmt.field = V4L2_FIELD_INTERLACED;
-	fb->fmt.priv = 0;
 	if (fb->fmt.pixelformat != V4L2_PIX_FMT_PAL8)
 		fb->fmt.bytesperline *= 2;
 	if (fb->fmt.pixelformat == V4L2_PIX_FMT_RGB32 ||
diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c
index 1a77f8dfafa5..aeae54708811 100644
--- a/drivers/media/pci/meye/meye.c
+++ b/drivers/media/pci/meye/meye.c
@@ -1166,7 +1166,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *fh,
 	f->fmt.pix.sizeimage = f->fmt.pix.height *
 			       f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = 0;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
@@ -1232,7 +1231,6 @@ static int vidioc_s_fmt_vid_cap(struct file *file, void *fh,
 	f->fmt.pix.sizeimage = f->fmt.pix.height *
 			       f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = 0;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c
index 5526ed5444fb..ef39261c1f20 100644
--- a/drivers/media/pci/saa7134/saa7134-empress.c
+++ b/drivers/media/pci/saa7134/saa7134-empress.c
@@ -130,7 +130,6 @@ static int empress_g_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.pixelformat  = V4L2_PIX_FMT_MPEG;
 	f->fmt.pix.sizeimage    = TS_PACKET_SIZE * dev->ts.nr_packets;
 	f->fmt.pix.bytesperline = 0;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
@@ -148,7 +147,6 @@ static int empress_s_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.pixelformat  = V4L2_PIX_FMT_MPEG;
 	f->fmt.pix.sizeimage    = TS_PACKET_SIZE * dev->ts.nr_packets;
 	f->fmt.pix.bytesperline = 0;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
@@ -166,7 +164,6 @@ static int empress_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.pixelformat  = V4L2_PIX_FMT_MPEG;
 	f->fmt.pix.sizeimage    = TS_PACKET_SIZE * dev->ts.nr_packets;
 	f->fmt.pix.bytesperline = 0;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c
index d37599980768..0cfa2ca6a32a 100644
--- a/drivers/media/pci/saa7134/saa7134-video.c
+++ b/drivers/media/pci/saa7134/saa7134-video.c
@@ -1235,7 +1235,6 @@ static int saa7134_g_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.sizeimage =
 		f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace   = V4L2_COLORSPACE_SMPTE170M;
-	f->fmt.pix.priv = 0;
 	return 0;
 }
 
@@ -1315,7 +1314,6 @@ static int saa7134_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.sizeimage =
 		f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace   = V4L2_COLORSPACE_SMPTE170M;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c
index f2d8c70d35f5..365bd21301ba 100644
--- a/drivers/media/pci/sta2x11/sta2x11_vip.c
+++ b/drivers/media/pci/sta2x11/sta2x11_vip.c
@@ -640,7 +640,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.bytesperline = f->fmt.pix.width * 2;
 	f->fmt.pix.sizeimage = f->fmt.pix.width * 2 * f->fmt.pix.height;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
-	f->fmt.pix.priv = 0;
 	return 0;
 }
 
diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index b1783791d426..54886606ed68 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -613,8 +613,6 @@ static int coda_try_fmt(struct coda_ctx *ctx, struct coda_codec *codec,
 		BUG();
 	}
 
-	f->fmt.pix.priv = 0;
-
 	return 0;
 }
 
diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c
index 877b46e8b2e4..0bd6dcb13cbc 100644
--- a/drivers/media/platform/davinci/vpif_display.c
+++ b/drivers/media/platform/davinci/vpif_display.c
@@ -648,7 +648,6 @@ static int vpif_try_fmt_vid_out(struct file *file, void *priv,
 	pixfmt->width = common->fmt.fmt.pix.width;
 	pixfmt->height = common->fmt.fmt.pix.height;
 	pixfmt->sizeimage = pixfmt->bytesperline * pixfmt->height * 2;
-	pixfmt->priv = 0;
 
 	return 0;
 }
diff --git a/drivers/media/platform/mem2mem_testdev.c b/drivers/media/platform/mem2mem_testdev.c
index 0714070ed7fa..c1b03cfd6ded 100644
--- a/drivers/media/platform/mem2mem_testdev.c
+++ b/drivers/media/platform/mem2mem_testdev.c
@@ -532,7 +532,6 @@ static int vidioc_try_fmt(struct v4l2_format *f, struct m2mtest_fmt *fmt)
 	f->fmt.pix.bytesperline = (f->fmt.pix.width * fmt->depth) >> 3;
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.field = V4L2_FIELD_NONE;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index 9a726eacb29b..2d177fa58471 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -165,7 +165,6 @@ static int omap_vout_try_format(struct v4l2_pix_format *pix)
 
 	pix->pixelformat = omap_formats[ifmt].pixelformat;
 	pix->field = V4L2_FIELD_ANY;
-	pix->priv = 0;
 
 	switch (pix->pixelformat) {
 	case V4L2_PIX_FMT_YUYV:
@@ -1896,7 +1895,6 @@ static int __init omap_vout_setup_video_data(struct omap_vout_device *vout)
 	pix->field = V4L2_FIELD_ANY;
 	pix->bytesperline = pix->width * 2;
 	pix->sizeimage = pix->bytesperline * pix->height;
-	pix->priv = 0;
 	pix->colorspace = V4L2_COLORSPACE_JPEG;
 
 	vout->bpp = RGB565_BPP;
diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c
index 744e43b480bc..8dc279d4d561 100644
--- a/drivers/media/platform/sh_veu.c
+++ b/drivers/media/platform/sh_veu.c
@@ -425,7 +425,6 @@ static int sh_veu_g_fmt(struct sh_veu_file *veu_file, struct v4l2_format *f)
 	pix->bytesperline	= vfmt->bytesperline;
 	pix->sizeimage		= vfmt->bytesperline * pix->height *
 		vfmt->fmt->depth / vfmt->fmt->ydepth;
-	pix->priv		= 0;
 	dev_dbg(veu->dev, "%s(): type: %d, size %u @ %ux%u, fmt %x\n", __func__,
 		f->type, pix->sizeimage, pix->width, pix->height, pix->pixelformat);
 
@@ -473,7 +472,6 @@ static int sh_veu_try_fmt(struct v4l2_format *f, const struct sh_veu_format *fmt
 
 	pix->pixelformat	= fmt->fourcc;
 	pix->colorspace		= sh_veu_4cc2cspace(pix->pixelformat);
-	pix->priv		= 0;
 
 	pr_debug("%s(): type: %d, size %u\n", __func__, f->type, pix->sizeimage);
 
diff --git a/drivers/media/platform/vino.c b/drivers/media/platform/vino.c
index 470d35336119..91d44ea16f27 100644
--- a/drivers/media/platform/vino.c
+++ b/drivers/media/platform/vino.c
@@ -3147,7 +3147,6 @@ static int vino_try_fmt_vid_cap(struct file *file, void *__fh,
 	pf->colorspace =
 		vino_data_formats[tempvcs.data_format].colorspace;
 
-	pf->priv = 0;
 	return 0;
 }
 
@@ -3175,8 +3174,6 @@ static int vino_g_fmt_vid_cap(struct file *file, void *__fh,
 	pf->colorspace =
 		vino_data_formats[vcs->data_format].colorspace;
 
-	pf->priv = 0;
-
 	spin_unlock_irqrestore(&vino_drvdata->input_lock, flags);
 	return 0;
 }
@@ -3219,8 +3216,6 @@ static int vino_s_fmt_vid_cap(struct file *file, void *__fh,
 	pf->colorspace =
 		vino_data_formats[vcs->data_format].colorspace;
 
-	pf->priv = 0;
-
 	spin_unlock_irqrestore(&vino_drvdata->input_lock, flags);
 	return 0;
 }
diff --git a/drivers/media/platform/vivi.c b/drivers/media/platform/vivi.c
index 7542b5dd9910..80333714ffa7 100644
--- a/drivers/media/platform/vivi.c
+++ b/drivers/media/platform/vivi.c
@@ -1014,7 +1014,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 		f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
 	else
 		f->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
-	f->fmt.pix.priv = 0;
 	return 0;
 }
 
diff --git a/drivers/media/usb/cx231xx/cx231xx-417.c b/drivers/media/usb/cx231xx/cx231xx-417.c
index f0400e260eb7..459bb0e98971 100644
--- a/drivers/media/usb/cx231xx/cx231xx-417.c
+++ b/drivers/media/usb/cx231xx/cx231xx-417.c
@@ -1563,7 +1563,6 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.width = dev->ts1.width;
 	f->fmt.pix.height = dev->ts1.height;
 	f->fmt.pix.field = V4L2_FIELD_INTERLACED;
-	f->fmt.pix.priv = 0;
 	dprintk(1, "VIDIOC_G_FMT: w: %d, h: %d\n",
 		dev->ts1.width, dev->ts1.height);
 	dprintk(3, "exit vidioc_g_fmt_vid_cap()\n");
@@ -1582,7 +1581,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.sizeimage = mpeglines * mpeglinesize;
 	f->fmt.pix.field = V4L2_FIELD_INTERLACED;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
-	f->fmt.pix.priv = 0;
 	dprintk(1, "VIDIOC_TRY_FMT: w: %d, h: %d\n",
 		dev->ts1.width, dev->ts1.height);
 	dprintk(3, "exit vidioc_try_fmt_vid_cap()\n");
diff --git a/drivers/media/usb/cx231xx/cx231xx-video.c b/drivers/media/usb/cx231xx/cx231xx-video.c
index ae31ca2fc9a1..3b3ada6562ca 100644
--- a/drivers/media/usb/cx231xx/cx231xx-video.c
+++ b/drivers/media/usb/cx231xx/cx231xx-video.c
@@ -885,7 +885,6 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
 
 	f->fmt.pix.field = V4L2_FIELD_INTERLACED;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
@@ -930,7 +929,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.sizeimage = f->fmt.pix.bytesperline * height;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
 	f->fmt.pix.field = V4L2_FIELD_INTERLACED;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c
index 42d223239206..e8cf23c91cef 100644
--- a/drivers/media/usb/gspca/gspca.c
+++ b/drivers/media/usb/gspca/gspca.c
@@ -1109,8 +1109,8 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv,
 	struct gspca_dev *gspca_dev = video_drvdata(file);
 
 	fmt->fmt.pix = gspca_dev->pixfmt;
-	/* some drivers use priv internally, zero it before giving it to
-	   userspace */
+	/* some drivers use priv internally, zero it before giving it back to
+	   the core */
 	fmt->fmt.pix.priv = 0;
 	return 0;
 }
@@ -1146,8 +1146,8 @@ static int try_fmt_vid_cap(struct gspca_dev *gspca_dev,
 		fmt->fmt.pix.height = h;
 		gspca_dev->sd_desc->try_fmt(gspca_dev, fmt);
 	}
-	/* some drivers use priv internally, zero it before giving it to
-	   userspace */
+	/* some drivers use priv internally, zero it before giving it back to
+	   the core */
 	fmt->fmt.pix.priv = 0;
 	return mode;			/* used when s_fmt */
 }
diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c
index dca4b65053aa..4b7653fd4b0b 100644
--- a/drivers/media/usb/hdpvr/hdpvr-video.c
+++ b/drivers/media/usb/hdpvr/hdpvr-video.c
@@ -1022,7 +1022,6 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *_fh,
 	f->fmt.pix.pixelformat	= V4L2_PIX_FMT_MPEG;
 	f->fmt.pix.sizeimage	= dev->bulk_in_size;
 	f->fmt.pix.bytesperline	= 0;
-	f->fmt.pix.priv		= 0;
 	if (f->fmt.pix.width == 720) {
 		/* SDTV formats */
 		f->fmt.pix.colorspace = V4L2_COLORSPACE_SMPTE170M;
diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c
index d76860b6a0b8..3588dc38db87 100644
--- a/drivers/media/usb/stkwebcam/stk-webcam.c
+++ b/drivers/media/usb/stkwebcam/stk-webcam.c
@@ -923,7 +923,6 @@ static int stk_vidioc_g_fmt_vid_cap(struct file *filp,
 		pix_format->bytesperline = 2 * pix_format->width;
 	pix_format->sizeimage = pix_format->bytesperline
 				* pix_format->height;
-	pix_format->priv = 0;
 	return 0;
 }
 
@@ -967,7 +966,6 @@ static int stk_try_fmt_vid_cap(struct file *filp,
 		fmtd->fmt.pix.bytesperline = 2 * fmtd->fmt.pix.width;
 	fmtd->fmt.pix.sizeimage = fmtd->fmt.pix.bytesperline
 		* fmtd->fmt.pix.height;
-	fmtd->fmt.pix.priv = 0;
 	return 0;
 }
 
diff --git a/drivers/media/usb/tlg2300/pd-video.c b/drivers/media/usb/tlg2300/pd-video.c
index 8df668d06552..8cd7f02fcf9f 100644
--- a/drivers/media/usb/tlg2300/pd-video.c
+++ b/drivers/media/usb/tlg2300/pd-video.c
@@ -1321,7 +1321,6 @@ static void init_video_context(struct running_context *context)
 				.bytesperline	= 720 * 2,
 				.sizeimage	= 720 * 576 * 2,
 				.colorspace	= V4L2_COLORSPACE_SMPTE170M,
-				.priv		= 0
 			};
 }
 
diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c
index 9bde0642ffd1..793577fc4633 100644
--- a/drivers/media/usb/tm6000/tm6000-video.c
+++ b/drivers/media/usb/tm6000/tm6000-video.c
@@ -918,7 +918,6 @@ static int vidioc_g_fmt_vid_cap(struct file *file, void *priv,
 		(f->fmt.pix.width * fh->fmt->depth) >> 3;
 	f->fmt.pix.sizeimage =
 		f->fmt.pix.height * f->fmt.pix.bytesperline;
-	f->fmt.pix.priv = 0;
 
 	return 0;
 }
@@ -959,7 +958,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.width &= ~0x01;
 
 	f->fmt.pix.field = field;
-	f->fmt.pix.priv = 0;
 
 	f->fmt.pix.bytesperline =
 		(f->fmt.pix.width * fmt->depth) >> 3;
diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c
index 3b80579a82c5..5c006277b8b1 100644
--- a/drivers/media/usb/zr364xx/zr364xx.c
+++ b/drivers/media/usb/zr364xx/zr364xx.c
@@ -806,7 +806,6 @@ static int zr364xx_vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.bytesperline = f->fmt.pix.width * 2;
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_JPEG;
-	f->fmt.pix.priv = 0;
 	DBG("%s: V4L2_PIX_FMT_%s (%d) ok!\n", __func__,
 	    decode_fourcc(f->fmt.pix.pixelformat, pixelformat_name),
 	    f->fmt.pix.field);
@@ -829,7 +828,6 @@ static int zr364xx_vidioc_g_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.bytesperline = f->fmt.pix.width * 2;
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_JPEG;
-	f->fmt.pix.priv = 0;
 	return 0;
 }
 
@@ -866,7 +864,6 @@ static int zr364xx_vidioc_s_fmt_vid_cap(struct file *file, void *priv,
 	f->fmt.pix.bytesperline = f->fmt.pix.width * 2;
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 	f->fmt.pix.colorspace = V4L2_COLORSPACE_JPEG;
-	f->fmt.pix.priv = 0;
 	cam->vb_vidq.field = f->fmt.pix.field;
 
 	if (f->fmt.pix.width == 160 && f->fmt.pix.height == 120)
diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index 7e2411c36419..cca6c2f76b3a 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -540,7 +540,16 @@ struct v4l2_framebuffer32 {
 	__u32			capability;
 	__u32			flags;
 	compat_caddr_t 		base;
-	struct v4l2_pix_format	fmt;
+	struct {
+		__u32		width;
+		__u32		height;
+		__u32		pixelformat;
+		__u32		field;
+		__u32		bytesperline;
+		__u32		sizeimage;
+		__u32		colorspace;
+		__u32		priv;
+	} fmt;
 };
 
 static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_framebuffer32 __user *up)
@@ -550,10 +559,10 @@ static int get_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame
 	if (!access_ok(VERIFY_READ, up, sizeof(struct v4l2_framebuffer32)) ||
 		get_user(tmp, &up->base) ||
 		get_user(kp->capability, &up->capability) ||
-		get_user(kp->flags, &up->flags))
+		get_user(kp->flags, &up->flags) ||
+		copy_from_user(&kp->fmt, &up->fmt, sizeof(up->fmt)))
 			return -EFAULT;
 	kp->base = compat_ptr(tmp);
-	get_v4l2_pix_format(&kp->fmt, &up->fmt);
 	return 0;
 }
 
@@ -564,9 +573,9 @@ static int put_v4l2_framebuffer32(struct v4l2_framebuffer *kp, struct v4l2_frame
 	if (!access_ok(VERIFY_WRITE, up, sizeof(struct v4l2_framebuffer32)) ||
 		put_user(tmp, &up->base) ||
 		put_user(kp->capability, &up->capability) ||
-		put_user(kp->flags, &up->flags))
+		put_user(kp->flags, &up->flags) ||
+		copy_to_user(&up->fmt, &kp->fmt, sizeof(up->fmt)))
 			return -EFAULT;
-	put_v4l2_pix_format(&kp->fmt, &up->fmt);
 	return 0;
 }
 
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 96bc117f66b2..2e630005676f 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -973,13 +973,48 @@ static int check_fmt(struct file *file, enum v4l2_buf_type type)
 	return -EINVAL;
 }
 
+static void v4l_sanitize_format(struct v4l2_format *fmt)
+{
+	unsigned int offset;
+
+	/*
+	 * The v4l2_pix_format structure has been extended with fields that were
+	 * not previously required to be set to zero by applications. The priv
+	 * field, when set to a magic value, indicates the the extended fields
+	 * are valid. Otherwise they will contain undefined values. To simplify
+	 * the API towards drivers zero the extended fields and set the priv
+	 * field to the magic value when the extended pixel format structure
+	 * isn't used by applications.
+	 */
+
+	if (fmt->type != V4L2_BUF_TYPE_VIDEO_CAPTURE &&
+	    fmt->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
+		return;
+
+	if (fmt->fmt.pix.priv == V4L2_PIX_FMT_PRIV_MAGIC)
+		return;
+
+	fmt->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC;
+
+	offset = offsetof(struct v4l2_pix_format, priv)
+	       + sizeof(fmt->fmt.pix.priv);
+	memset(((void *)&fmt->fmt.pix) + offset, 0,
+	       sizeof(fmt->fmt.pix) - offset);
+}
+
 static int v4l_querycap(const struct v4l2_ioctl_ops *ops,
 				struct file *file, void *fh, void *arg)
 {
 	struct v4l2_capability *cap = (struct v4l2_capability *)arg;
+	int ret;
 
 	cap->version = LINUX_VERSION_CODE;
-	return ops->vidioc_querycap(file, fh, cap);
+
+	ret = ops->vidioc_querycap(file, fh, cap);
+
+	cap->capabilities |= V4L2_CAP_EXT_PIX_FORMAT;
+
+	return ret;
 }
 
 static int v4l_s_input(const struct v4l2_ioctl_ops *ops,
@@ -1103,12 +1138,17 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops,
 	bool is_sdr = vfd->vfl_type == VFL_TYPE_SDR;
 	bool is_rx = vfd->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vfd->vfl_dir != VFL_DIR_RX;
+	int ret;
+
+	p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC;
 
 	switch (p->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
 		if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_vid_cap))
 			break;
-		return ops->vidioc_g_fmt_vid_cap(file, fh, arg);
+		ret = ops->vidioc_g_fmt_vid_cap(file, fh, arg);
+		p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC;
+		return ret;
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
 		if (unlikely(!is_rx || !is_vid || !ops->vidioc_g_fmt_vid_cap_mplane))
 			break;
@@ -1128,7 +1168,9 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops,
 	case V4L2_BUF_TYPE_VIDEO_OUTPUT:
 		if (unlikely(!is_tx || !is_vid || !ops->vidioc_g_fmt_vid_out))
 			break;
-		return ops->vidioc_g_fmt_vid_out(file, fh, arg);
+		ret = ops->vidioc_g_fmt_vid_out(file, fh, arg);
+		p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC;
+		return ret;
 	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
 		if (unlikely(!is_tx || !is_vid || !ops->vidioc_g_fmt_vid_out_mplane))
 			break;
@@ -1163,6 +1205,8 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops,
 	bool is_rx = vfd->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vfd->vfl_dir != VFL_DIR_RX;
 
+	v4l_sanitize_format(p);
+
 	switch (p->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
 		if (unlikely(!is_rx || !is_vid || !ops->vidioc_s_fmt_vid_cap))
@@ -1233,6 +1277,8 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops,
 	bool is_rx = vfd->vfl_dir != VFL_DIR_TX;
 	bool is_tx = vfd->vfl_dir != VFL_DIR_RX;
 
+	v4l_sanitize_format(p);
+
 	switch (p->type) {
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE:
 		if (unlikely(!is_rx || !is_vid || !ops->vidioc_try_fmt_vid_cap))
@@ -1516,7 +1562,18 @@ static int v4l_create_bufs(const struct v4l2_ioctl_ops *ops,
 	struct v4l2_create_buffers *create = arg;
 	int ret = check_fmt(file, create->format.type);
 
-	return ret ? ret : ops->vidioc_create_bufs(file, fh, create);
+	if (ret)
+		return ret;
+
+	v4l_sanitize_format(&create->format);
+
+	ret = ops->vidioc_create_bufs(file, fh, create);
+
+	if (create->format.type == V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+	    create->format.type == V4L2_BUF_TYPE_VIDEO_OUTPUT)
+		create->format.fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC;
+
+	return ret;
 }
 
 static int v4l_prepare_buf(const struct v4l2_ioctl_ops *ops,
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index a498d8b58679..eb3bdd33816b 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -268,6 +268,7 @@ struct v4l2_capability {
 #define V4L2_CAP_MODULATOR		0x00080000  /* has a modulator */
 
 #define V4L2_CAP_SDR_CAPTURE		0x00100000  /* Is a SDR capture device */
+#define V4L2_CAP_EXT_PIX_FORMAT		0x00200000  /* Supports the extended pixel format */
 
 #define V4L2_CAP_READWRITE              0x01000000  /* read/write systemcalls */
 #define V4L2_CAP_ASYNCIO                0x02000000  /* async I/O */
@@ -448,6 +449,9 @@ struct v4l2_pix_format {
 #define V4L2_SDR_FMT_CU8          v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */
 #define V4L2_SDR_FMT_CU16LE       v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */
 
+/* priv field value to indicates that subsequent fields are valid. */
+#define V4L2_PIX_FMT_PRIV_MAGIC		0xfeedcafe
+
 /*
  *	F O R M A T   E N U M E R A T I O N
  */
@@ -752,7 +756,16 @@ struct v4l2_framebuffer {
 /* FIXME: in theory we should pass something like PCI device + memory
  * region + offset instead of some physical address */
 	void                    *base;
-	struct v4l2_pix_format	fmt;
+	struct {
+		__u32		width;
+		__u32		height;
+		__u32		pixelformat;
+		__u32		field;		/* enum v4l2_field */
+		__u32		bytesperline;	/* for padding, zero if unused */
+		__u32		sizeimage;
+		__u32		colorspace;	/* enum v4l2_colorspace */
+		__u32		priv;		/* reserved field, set to 0 */
+	} fmt;
 };
 /*  Flags for the 'capability' field. Read only */
 #define V4L2_FBUF_CAP_EXTERNOVERLAY	0x0001
-- 
cgit v1.2.3


From c96fd46afb34a554406bce9784126b96ad09091e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Tue, 27 May 2014 10:12:43 -0300
Subject: [media] v4l: Add premultiplied alpha flag for pixel formats

When set, the new V4L2_PIX_FMT_FLAG_PREMUL_ALPHA flag indicates that the
pixel values are premultiplied by the alpha channel value.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/DocBook/media/v4l/pixfmt.xml | 31 +++++++++++++++++++++++++++++-
 Documentation/DocBook/media/v4l/v4l2.xml   |  2 +-
 drivers/media/v4l2-core/v4l2-ioctl.c       |  5 +++--
 include/uapi/linux/videodev2.h             |  8 +++++++-
 4 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index bb36b3829cf9..87ce7f3e7178 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -135,6 +135,12 @@ extended fields were set to zero. On return drivers must set the
 <constant>V4L2_PIX_FMT_PRIV_MAGIC</constant> and all the extended fields to
 applicable values.</para></entry>
 	</row>
+	<row>
+	  <entry>__u32</entry>
+	  <entry><structfield>flags</structfield></entry>
+	    <entry>Flags set by the application or driver, see <xref
+linkend="format-flags" />.</entry>
+	</row>
       </tbody>
     </tgroup>
   </table>
@@ -220,9 +226,15 @@ codes can be used.</entry>
           and the number of valid entries in the
           <structfield>plane_fmt</structfield> array.</entry>
         </row>
+	<row>
+	  <entry>__u8</entry>
+	  <entry><structfield>flags</structfield></entry>
+	  <entry>Flags set by the application or driver, see <xref
+linkend="format-flags" />.</entry>
+	</row>
         <row>
           <entry>__u8</entry>
-          <entry><structfield>reserved[11]</structfield></entry>
+          <entry><structfield>reserved[10]</structfield></entry>
           <entry>Reserved for future extensions. Should be zeroed by the
            application.</entry>
         </row>
@@ -1079,4 +1091,21 @@ concatenated to form the JPEG stream. </para>
 	</tbody>
       </tgroup>
     </table>
+
+    <table frame="none" pgwide="1" id="format-flags">
+      <title>Format Flags</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>V4L2_PIX_FMT_FLAG_PREMUL_ALPHA</constant></entry>
+	    <entry>0x00000001</entry>
+	    <entry>The color values are premultiplied by the alpha channel
+value. For example, if a light blue pixel with 50% transparency was described by
+RGBA values (128, 192, 255, 128), the same pixel described with premultiplied
+colors would be described by RGBA values (64, 96, 128, 128) </entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
   </section>
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml
index d0a48bebfa52..f2f81f06a17b 100644
--- a/Documentation/DocBook/media/v4l/v4l2.xml
+++ b/Documentation/DocBook/media/v4l/v4l2.xml
@@ -155,7 +155,7 @@ applications. -->
 	<revnumber>3.16</revnumber>
 	<date>2014-05-27</date>
 	<authorinitials>lp</authorinitials>
-	<revremark>Extended &v4l2-pix-format;.
+	<revremark>Extended &v4l2-pix-format;. Added format flags.
 	</revremark>
       </revision>
 
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 2e630005676f..e0bafda89e13 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -256,7 +256,8 @@ static void v4l_print_format(const void *arg, bool write_only)
 		pix = &p->fmt.pix;
 		pr_cont(", width=%u, height=%u, "
 			"pixelformat=%c%c%c%c, field=%s, "
-			"bytesperline=%u, sizeimage=%u, colorspace=%d\n",
+			"bytesperline=%u, sizeimage=%u, colorspace=%d, "
+			"flags %u\n",
 			pix->width, pix->height,
 			(pix->pixelformat & 0xff),
 			(pix->pixelformat >>  8) & 0xff,
@@ -264,7 +265,7 @@ static void v4l_print_format(const void *arg, bool write_only)
 			(pix->pixelformat >> 24) & 0xff,
 			prt_names(pix->field, v4l2_field_names),
 			pix->bytesperline, pix->sizeimage,
-			pix->colorspace);
+			pix->colorspace, pix->flags);
 		break;
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
 	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index eb3bdd33816b..b73e8cda7192 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -288,6 +288,7 @@ struct v4l2_pix_format {
 	__u32          		sizeimage;
 	__u32			colorspace;	/* enum v4l2_colorspace */
 	__u32			priv;		/* private data, depends on pixelformat */
+	__u32			flags;		/* format flags (V4L2_PIX_FMT_FLAG_*) */
 };
 
 /*      Pixel format         FOURCC                          depth  Description  */
@@ -452,6 +453,9 @@ struct v4l2_pix_format {
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC		0xfeedcafe
 
+/* Flags */
+#define V4L2_PIX_FMT_FLAG_PREMUL_ALPHA	0x00000001
+
 /*
  *	F O R M A T   E N U M E R A T I O N
  */
@@ -1754,6 +1758,7 @@ struct v4l2_plane_pix_format {
  * @colorspace:		enum v4l2_colorspace; supplemental to pixelformat
  * @plane_fmt:		per-plane information
  * @num_planes:		number of planes for this format
+ * @flags:		format flags (V4L2_PIX_FMT_FLAG_*)
  */
 struct v4l2_pix_format_mplane {
 	__u32				width;
@@ -1764,7 +1769,8 @@ struct v4l2_pix_format_mplane {
 
 	struct v4l2_plane_pix_format	plane_fmt[VIDEO_MAX_PLANES];
 	__u8				num_planes;
-	__u8				reserved[11];
+	__u8				flags;
+	__u8				reserved[10];
 } __attribute__ ((packed));
 
 /**
-- 
cgit v1.2.3


From e34c4db8fe5c8d47753057b2409f9b8388f1896e Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 26 Feb 2014 18:22:05 -0300
Subject: [media] videodev2.h: add V4L2_FIELD_HAS_T_OR_B macro

Add a macro to test if the field consists of a single top
or bottom field. Anyone who needs to work with fields as opposed to
frame will need this.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index b73e8cda7192..1f1a65c1fe8e 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -124,6 +124,10 @@ enum v4l2_field {
 	 (field) == V4L2_FIELD_INTERLACED_BT ||\
 	 (field) == V4L2_FIELD_SEQ_TB ||\
 	 (field) == V4L2_FIELD_SEQ_BT)
+#define V4L2_FIELD_HAS_T_OR_B(field)	\
+	((field) == V4L2_FIELD_BOTTOM ||\
+	 (field) == V4L2_FIELD_TOP ||\
+	 (field) == V4L2_FIELD_ALTERNATE)
 
 enum v4l2_buf_type {
 	V4L2_BUF_TYPE_VIDEO_CAPTURE        = 1,
-- 
cgit v1.2.3


From aef9a7bd9b676f797dd5cefd43deb30d36b976a9 Mon Sep 17 00:00:00 2001
From: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Date: Wed, 16 Jul 2014 01:19:36 +0000
Subject: serial/uart/8250: Add tunable RX interrupt trigger I/F of FIFO
 buffers

Add tunable RX interrupt trigger I/F of FIFO buffers.

Serial devices are used as not only message communication devices but control
or sending communication devices. For the latter uses, normally small data
will be exchanged, so user applications want to receive data unit as soon as
possible for real-time tendency. If we have a sensor which sends a 1 byte data
each time and must control a device based on the sensor feedback, the RX
interrupt should be triggered for each data.

According to HW specification of serial UART devices, RX interrupt trigger
can be changed, but the trigger is hard-coded. For example, RX interrupt trigger
in 16550A can be set to 1, 4, 8, or 14 bytes for HW, but current driver sets
the trigger to only 8bytes.

This patch makes some devices change RX interrupt trigger from userland.

<How to use>
- Read current setting
 # cat /sys/class/tty/ttyS0/rx_trig_bytes
 8

- Write user setting
 # echo 1 > /sys/class/tty/ttyS0/rx_trig_bytes
 # cat /sys/class/tty/ttyS0/rx_trig_bytes
 1

<Support uart devices>
- 16550A and Tegra (1, 4, 8, or 14 bytes)
- 16650V2 (8, 16, 24, or 28 bytes)
- 16654 (8, 16, 56, or 60 bytes)
- 16750 (1, 16, 32, or 56 bytes)

<Change log>
Changes in V9:
 - Use attr_group instead of dev_spec_attr_group of uart_port structure

Changes in V8:
 - Divide this patch from V7's patch based on Greg's comment

Changes in V7:
 - Add Documentation
 - Change I/F name from rx_int_trig to rx_trig_bytes because the name
   rx_int_trig is hard to understand how users specify the value

Changes in V6:
 - Move FCR_RX_TRIG_* definition in 8250.h to include/uapi/linux/serial_reg.h,
   rename those to UART_FCR_R_TRIG_*, and use UART_FCR_TRIGGER_MASK to
   UART_FCR_R_TRIG_BITS()
 - Change following function names:
    convert_fcr2val() => fcr_get_rxtrig_bytes()
    convert_val2rxtrig() => bytes_to_fcr_rxtrig()
 - Fix typo in serial8250_do_set_termios()
 - Delete the verbose error message pr_info() in bytes_to_fcr_rxtrig()
 - Rename *rx_int_trig/rx_trig* to *rxtrig* for several functions or variables
   (but UI remains rx_int_trig)
 - Change the meaningless variable name 'val' to 'bytes' following functions:
    fcr_get_rxtrig_bytes(), bytes_to_fcr_rxtrig(), do_set_rxtrig(),
    do_serial8250_set_rxtrig(), and serial8250_set_attr_rxtrig()
 - Use up->fcr in order to get rxtrig_bytes instead of rx_trig_raw in
   fcr_get_rxtrig_bytes()
 - Use conf_type->rxtrig_bytes[0] instead of switch statement for support check
   in register_dev_spec_attr_grp()
 - Delete the checking whether a user changed FCR or not when minimum buffer
   is needed in serial8250_do_set_termios()

Changes in V5.1:
 - Fix FCR_RX_TRIG_MAX_STATE definition

Changes in V5:
 - Support Tegra, 16650V2, 16654, and 16750
 - Store default FCR value to up->fcr when the port is first created
 - Add rx_trig_byte[] in uart_config[] for each device and use rx_trig_byte[]
   in convert_fcr2val() and convert_val2rxtrig()

Changes in V4:
 - Introduce fifo_bug flag in uart_8250_port structure
   This is enabled only when parity is enabled and UART_BUG_PARITY is enabled
   for up->bugs. If this flag is enabled, user cannot set RX trigger.
 - Return -EOPNOTSUPP when it does not support device at convert_fcr2val() and
   at convert_val2rxtrig()
 - Set the nearest lower RX trigger when users input a meaningless value at
   convert_val2rxtrig()
 - Check whether p->fcr is existing at serial8250_clear_and_reinit_fifos()
 - Set fcr = up->fcr in the begging of serial8250_do_set_termios()

Changes in V3:
 - Change I/F from ioctl(2) to sysfs(rx_int_trig)

Changed in V2:
 - Use _IOW for TIOCSFIFORTRIG definition
 - Pass the interrupt trigger value itself

Signed-off-by: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-tty |  16 ++++
 drivers/tty/serial/8250/8250.h      |   2 +
 drivers/tty/serial/8250/8250_core.c | 173 ++++++++++++++++++++++++++++++++----
 include/linux/serial_8250.h         |   2 +
 include/uapi/linux/serial_reg.h     |   5 ++
 5 files changed, 183 insertions(+), 15 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
index ad22fb0ee765..9eb3c2b6b040 100644
--- a/Documentation/ABI/testing/sysfs-tty
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -138,3 +138,19 @@ Description:
 
 		 These sysfs values expose the TIOCGSERIAL interface via
 		 sysfs rather than via ioctls.
+
+What:		/sys/class/tty/ttyS0/rx_trig_bytes
+Date:		May 2014
+Contact:	Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
+Description:
+		 Shows current RX interrupt trigger bytes or sets the
+		 user specified value to change it for the FIFO buffer.
+		 Users can show or set this value regardless of opening the
+		 serial device file or not.
+
+		 The RX trigger can be set one of four kinds of values for UART
+		 serials. When users input a meaning less value to this I/F,
+		 the RX trigger is changed to the nearest lower value for the
+		 device specification. For example, when user sets 7bytes on
+		 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
+		 automatically changed to 4 bytes.
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 1ebf8538b4fa..1b08c918cd51 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -12,6 +12,7 @@
  */
 
 #include <linux/serial_8250.h>
+#include <linux/serial_reg.h>
 #include <linux/dmaengine.h>
 
 struct uart_8250_dma {
@@ -60,6 +61,7 @@ struct serial8250_config {
 	unsigned short	fifo_size;
 	unsigned short	tx_loadsz;
 	unsigned char	fcr;
+	unsigned char	rxtrig_bytes[UART_FCR_R_TRIG_MAX_STATE];
 	unsigned int	flags;
 };
 
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 0da01458816e..1d42dba6121d 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -31,7 +31,6 @@
 #include <linux/tty.h>
 #include <linux/ratelimit.h>
 #include <linux/tty_flip.h>
-#include <linux/serial_reg.h>
 #include <linux/serial_core.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
@@ -161,6 +160,7 @@ static const struct serial8250_config uart_config[] = {
 		.fifo_size	= 16,
 		.tx_loadsz	= 16,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.rxtrig_bytes	= {1, 4, 8, 14},
 		.flags		= UART_CAP_FIFO,
 	},
 	[PORT_CIRRUS] = {
@@ -180,6 +180,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 16,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
 				  UART_FCR_T_TRIG_00,
+		.rxtrig_bytes	= {8, 16, 24, 28},
 		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
 	},
 	[PORT_16750] = {
@@ -188,6 +189,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 64,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
 				  UART_FCR7_64BYTE,
+		.rxtrig_bytes	= {1, 16, 32, 56},
 		.flags		= UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE,
 	},
 	[PORT_STARTECH] = {
@@ -209,6 +211,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 32,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
 				  UART_FCR_T_TRIG_10,
+		.rxtrig_bytes	= {8, 16, 56, 60},
 		.flags		= UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP,
 	},
 	[PORT_16850] = {
@@ -266,6 +269,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 8,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 |
 				  UART_FCR_T_TRIG_01,
+		.rxtrig_bytes	= {1, 4, 8, 14},
 		.flags		= UART_CAP_FIFO | UART_CAP_RTOIE,
 	},
 	[PORT_XR17D15X] = {
@@ -530,11 +534,8 @@ static void serial8250_clear_fifos(struct uart_8250_port *p)
 
 void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p)
 {
-	unsigned char fcr;
-
 	serial8250_clear_fifos(p);
-	fcr = uart_config[p->port.type].fcr;
-	serial_out(p, UART_FCR, fcr);
+	serial_out(p, UART_FCR, p->fcr);
 }
 EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos);
 
@@ -2256,10 +2257,9 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 		          struct ktermios *old)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
-	unsigned char cval, fcr = 0;
+	unsigned char cval;
 	unsigned long flags;
 	unsigned int baud, quot;
-	int fifo_bug = 0;
 
 	switch (termios->c_cflag & CSIZE) {
 	case CS5:
@@ -2282,7 +2282,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	if (termios->c_cflag & PARENB) {
 		cval |= UART_LCR_PARITY;
 		if (up->bugs & UART_BUG_PARITY)
-			fifo_bug = 1;
+			up->fifo_bug = true;
 	}
 	if (!(termios->c_cflag & PARODD))
 		cval |= UART_LCR_EPAR;
@@ -2306,10 +2306,10 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 		quot++;
 
 	if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) {
-		fcr = uart_config[port->type].fcr;
-		if ((baud < 2400 && !up->dma) || fifo_bug) {
-			fcr &= ~UART_FCR_TRIGGER_MASK;
-			fcr |= UART_FCR_TRIGGER_1;
+		/* NOTE: If fifo_bug is not set, a user can set RX_trigger. */
+		if ((baud < 2400 && !up->dma) || up->fifo_bug) {
+			up->fcr &= ~UART_FCR_TRIGGER_MASK;
+			up->fcr |= UART_FCR_TRIGGER_1;
 		}
 	}
 
@@ -2442,15 +2442,15 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * is written without DLAB set, this mode will be disabled.
 	 */
 	if (port->type == PORT_16750)
-		serial_port_out(port, UART_FCR, fcr);
+		serial_port_out(port, UART_FCR, up->fcr);
 
 	serial_port_out(port, UART_LCR, cval);		/* reset DLAB */
 	up->lcr = cval;					/* Save LCR */
 	if (port->type != PORT_16750) {
 		/* emulated UARTs (Lucent Venus 167x) need two steps */
-		if (fcr & UART_FCR_ENABLE_FIFO)
+		if (up->fcr & UART_FCR_ENABLE_FIFO)
 			serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO);
-		serial_port_out(port, UART_FCR, fcr);		/* set fcr */
+		serial_port_out(port, UART_FCR, up->fcr);	/* set fcr */
 	}
 	serial8250_set_mctrl(port, port->mctrl);
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -2640,6 +2640,146 @@ static int serial8250_request_port(struct uart_port *port)
 	return ret;
 }
 
+static int fcr_get_rxtrig_bytes(struct uart_8250_port *up)
+{
+	const struct serial8250_config *conf_type = &uart_config[up->port.type];
+	unsigned char bytes;
+
+	bytes = conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(up->fcr)];
+
+	return bytes ? bytes : -EOPNOTSUPP;
+}
+
+static int bytes_to_fcr_rxtrig(struct uart_8250_port *up, unsigned char bytes)
+{
+	const struct serial8250_config *conf_type = &uart_config[up->port.type];
+	int i;
+
+	if (!conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(UART_FCR_R_TRIG_00)])
+		return -EOPNOTSUPP;
+
+	for (i = 1; i < UART_FCR_R_TRIG_MAX_STATE; i++) {
+		if (bytes < conf_type->rxtrig_bytes[i])
+			/* Use the nearest lower value */
+			return (--i) << UART_FCR_R_TRIG_SHIFT;
+	}
+
+	return UART_FCR_R_TRIG_11;
+}
+
+static int do_get_rxtrig(struct tty_port *port)
+{
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport = state->uart_port;
+	struct uart_8250_port *up =
+		container_of(uport, struct uart_8250_port, port);
+
+	if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1)
+		return -EINVAL;
+
+	return fcr_get_rxtrig_bytes(up);
+}
+
+static int do_serial8250_get_rxtrig(struct tty_port *port)
+{
+	int rxtrig_bytes;
+
+	mutex_lock(&port->mutex);
+	rxtrig_bytes = do_get_rxtrig(port);
+	mutex_unlock(&port->mutex);
+
+	return rxtrig_bytes;
+}
+
+static ssize_t serial8250_get_attr_rx_trig_bytes(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct tty_port *port = dev_get_drvdata(dev);
+	int rxtrig_bytes;
+
+	rxtrig_bytes = do_serial8250_get_rxtrig(port);
+	if (rxtrig_bytes < 0)
+		return rxtrig_bytes;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", rxtrig_bytes);
+}
+
+static int do_set_rxtrig(struct tty_port *port, unsigned char bytes)
+{
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport = state->uart_port;
+	struct uart_8250_port *up =
+		container_of(uport, struct uart_8250_port, port);
+	int rxtrig;
+
+	if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 ||
+	    up->fifo_bug)
+		return -EINVAL;
+
+	rxtrig = bytes_to_fcr_rxtrig(up, bytes);
+	if (rxtrig < 0)
+		return rxtrig;
+
+	serial8250_clear_fifos(up);
+	up->fcr &= ~UART_FCR_TRIGGER_MASK;
+	up->fcr |= (unsigned char)rxtrig;
+	serial_out(up, UART_FCR, up->fcr);
+	return 0;
+}
+
+static int do_serial8250_set_rxtrig(struct tty_port *port, unsigned char bytes)
+{
+	int ret;
+
+	mutex_lock(&port->mutex);
+	ret = do_set_rxtrig(port, bytes);
+	mutex_unlock(&port->mutex);
+
+	return ret;
+}
+
+static ssize_t serial8250_set_attr_rx_trig_bytes(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct tty_port *port = dev_get_drvdata(dev);
+	unsigned char bytes;
+	int ret;
+
+	if (!count)
+		return -EINVAL;
+
+	ret = kstrtou8(buf, 10, &bytes);
+	if (ret < 0)
+		return ret;
+
+	ret = do_serial8250_set_rxtrig(port, bytes);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(rx_trig_bytes, S_IRUSR | S_IWUSR | S_IRGRP,
+		   serial8250_get_attr_rx_trig_bytes,
+		   serial8250_set_attr_rx_trig_bytes);
+
+static struct attribute *serial8250_dev_attrs[] = {
+	&dev_attr_rx_trig_bytes.attr,
+	NULL,
+	};
+
+static struct attribute_group serial8250_dev_attr_group = {
+	.attrs = serial8250_dev_attrs,
+	};
+
+static void register_dev_spec_attr_grp(struct uart_8250_port *up)
+{
+	const struct serial8250_config *conf_type = &uart_config[up->port.type];
+
+	if (conf_type->rxtrig_bytes[0])
+		up->port.attr_group = &serial8250_dev_attr_group;
+}
+
 static void serial8250_config_port(struct uart_port *port, int flags)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -2687,6 +2827,9 @@ static void serial8250_config_port(struct uart_port *port, int flags)
 	if ((port->type == PORT_XR17V35X) ||
 	   (port->type == PORT_XR17D15X))
 		port->handle_irq = exar_handle_irq;
+
+	register_dev_spec_attr_grp(up);
+	up->fcr = uart_config[up->port.type].fcr;
 }
 
 static int
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 730ab4b3d686..f93649e22c43 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -74,8 +74,10 @@ struct uart_8250_port {
 	struct list_head	list;		/* ports on this IRQ */
 	unsigned short		capabilities;	/* port capabilities */
 	unsigned short		bugs;		/* port bugs */
+	bool			fifo_bug;	/* min RX trigger if enabled */
 	unsigned int		tx_loadsz;	/* transmit fifo load size */
 	unsigned char		acr;
+	unsigned char		fcr;
 	unsigned char		ier;
 	unsigned char		lcr;
 	unsigned char		mcr;
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 99b47058816a..df6c9ab6b0cd 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -88,6 +88,11 @@
 #define UART_FCR6_T_TRIGGER_30	0x30 /* Mask for transmit trigger set at 30 */
 #define UART_FCR7_64BYTE	0x20 /* Go into 64 byte mode (TI16C750) */
 
+#define UART_FCR_R_TRIG_SHIFT		6
+#define UART_FCR_R_TRIG_BITS(x)		\
+	(((x) & UART_FCR_TRIGGER_MASK) >> UART_FCR_R_TRIG_SHIFT)
+#define UART_FCR_R_TRIG_MAX_STATE	4
+
 #define UART_LCR	3	/* Out: Line Control Register */
 /*
  * Note: if the word length is 5 bits (UART_LCR_WLEN5), then setting 
-- 
cgit v1.2.3


From 48dc92b9fc3926844257316e75ba11eb5c742b2c Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Jun 2014 16:08:24 -0700
Subject: seccomp: add "seccomp" syscall

This adds the new "seccomp" syscall with both an "operation" and "flags"
parameter for future expansion. The third argument is a pointer value,
used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must
be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...).

In addition to the TSYNC flag later in this patch series, there is a
non-zero chance that this syscall could be used for configuring a fixed
argument area for seccomp-tracer-aware processes to pass syscall arguments
in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter"
for this syscall. Additionally, this syscall uses operation, flags,
and user pointer for arguments because strictly passing arguments via
a user pointer would mean seccomp itself would be unable to trivially
filter the seccomp syscall itself.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/Kconfig                      |  1 +
 arch/x86/syscalls/syscall_32.tbl  |  1 +
 arch/x86/syscalls/syscall_64.tbl  |  1 +
 include/linux/syscalls.h          |  2 ++
 include/uapi/asm-generic/unistd.h |  4 ++-
 include/uapi/linux/seccomp.h      |  4 +++
 kernel/seccomp.c                  | 55 +++++++++++++++++++++++++++++++++++----
 kernel/sys_ni.c                   |  3 +++
 8 files changed, 65 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 97ff872c7acc..0eae9df35b88 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
 	  - secure_computing is called from a ptrace_event()-safe context
 	  - secure_computing return value is checked and a return value of -1
 	    results in the system call being skipped immediately.
+	  - seccomp syscall wired up
 
 config SECCOMP_FILTER
 	def_bool y
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b867921612..7527eac24122 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,4 @@
 351	i386	sched_setattr		sys_sched_setattr
 352	i386	sched_getattr		sys_sched_getattr
 353	i386	renameat2		sys_renameat2
+354	i386	seccomp			sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1646d2..16272a6c12b7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,7 @@
 314	common	sched_setattr		sys_sched_setattr
 315	common	sched_getattr		sys_sched_getattr
 316	common	renameat2		sys_renameat2
+317	common	seccomp			sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0881a0ed322..1713977ee26f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+			    const char __user *uargs);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 333640608087..65acbf0e2867 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 276
 __SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 277
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ac2dc9f72973..b258878ba754 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,6 +10,10 @@
 #define SECCOMP_MODE_STRICT	1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT	0
+#define SECCOMP_SET_MODE_FILTER	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 05cac2c2eca1..f0652578af75 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
@@ -314,7 +315,7 @@ free_prog:
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(char __user *user_filter)
+static long seccomp_attach_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
 	long ret = -EFAULT;
@@ -517,6 +518,7 @@ out:
 #ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @flags:  flags to change filter behavior
  * @filter: struct sock_fprog containing filter
  *
  * This function may be called repeatedly to install additional filters.
@@ -527,11 +529,16 @@ out:
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-static long seccomp_set_mode_filter(char __user *filter)
+static long seccomp_set_mode_filter(unsigned int flags,
+				    const char __user *filter)
 {
 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
 	long ret = -EINVAL;
 
+	/* Validate flags. */
+	if (flags != 0)
+		goto out;
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -544,12 +551,35 @@ out:
 	return ret;
 }
 #else
-static inline long seccomp_set_mode_filter(char __user *filter)
+static inline long seccomp_set_mode_filter(unsigned int flags,
+					   const char __user *filter)
 {
 	return -EINVAL;
 }
 #endif
 
+/* Common entry point for both prctl and syscall. */
+static long do_seccomp(unsigned int op, unsigned int flags,
+		       const char __user *uargs)
+{
+	switch (op) {
+	case SECCOMP_SET_MODE_STRICT:
+		if (flags != 0 || uargs != NULL)
+			return -EINVAL;
+		return seccomp_set_mode_strict();
+	case SECCOMP_SET_MODE_FILTER:
+		return seccomp_set_mode_filter(flags, uargs);
+	default:
+		return -EINVAL;
+	}
+}
+
+SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
+			 const char __user *, uargs)
+{
+	return do_seccomp(op, flags, uargs);
+}
+
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
  * @seccomp_mode: requested mode to use
@@ -559,12 +589,27 @@ static inline long seccomp_set_mode_filter(char __user *filter)
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
+	unsigned int op;
+	char __user *uargs;
+
 	switch (seccomp_mode) {
 	case SECCOMP_MODE_STRICT:
-		return seccomp_set_mode_strict();
+		op = SECCOMP_SET_MODE_STRICT;
+		/*
+		 * Setting strict mode through prctl always ignored filter,
+		 * so make sure it is always NULL here to pass the internal
+		 * check in do_seccomp().
+		 */
+		uargs = NULL;
+		break;
 	case SECCOMP_MODE_FILTER:
-		return seccomp_set_mode_filter(filter);
+		op = SECCOMP_SET_MODE_FILTER;
+		uargs = filter;
+		break;
 	default:
 		return -EINVAL;
 	}
+
+	/* prctl interface doesn't have flags, so they are always zero. */
+	return do_seccomp(op, 0, uargs);
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 36441b51b5df..2904a2105914 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at);
 
 /* compare kernel pointers */
 cond_syscall(sys_kcmp);
+
+/* operate on Secure Computing state */
+cond_syscall(sys_seccomp);
-- 
cgit v1.2.3


From c2e1f2e30daa551db3c670c0ccfeab20a540b9e1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 5 Jun 2014 00:23:17 -0700
Subject: seccomp: implement SECCOMP_FILTER_FLAG_TSYNC

Applying restrictive seccomp filter programs to large or diverse
codebases often requires handling threads which may be started early in
the process lifetime (e.g., by code that is linked in). While it is
possible to apply permissive programs prior to process start up, it is
difficult to further restrict the kernel ABI to those threads after that
point.

This change adds a new seccomp syscall flag to SECCOMP_SET_MODE_FILTER for
synchronizing thread group seccomp filters at filter installation time.

When calling seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
filter) an attempt will be made to synchronize all threads in current's
threadgroup to its new seccomp filter program. This is possible iff all
threads are using a filter that is an ancestor to the filter current is
attempting to synchronize to. NULL filters (where the task is running as
SECCOMP_MODE_NONE) are also treated as ancestors allowing threads to be
transitioned into SECCOMP_MODE_FILTER. If prctrl(PR_SET_NO_NEW_PRIVS,
...) has been set on the calling thread, no_new_privs will be set for
all synchronized threads too. On success, 0 is returned. On failure,
the pid of one of the failing threads will be returned and no filters
will have been applied.

The race conditions against another thread are:
- requesting TSYNC (already handled by sighand lock)
- performing a clone (already handled by sighand lock)
- changing its filter (already handled by sighand lock)
- calling exec (handled by cred_guard_mutex)
The clone case is assisted by the fact that new threads will have their
seccomp state duplicated from their parent before appearing on the tasklist.

Holding cred_guard_mutex means that seccomp filters cannot be assigned
while in the middle of another thread's exec (potentially bypassing
no_new_privs or similar). The call to de_thread() may kill threads waiting
for the mutex.

Changes across threads to the filter pointer includes a barrier.

Based on patches by Will Drewry.

Suggested-by: Julien Tinnes <jln@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 fs/exec.c                    |   2 +-
 include/linux/seccomp.h      |   2 +
 include/uapi/linux/seccomp.h |   3 +
 kernel/seccomp.c             | 135 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 140 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 0f5c272410f6..ab1f1200ce5d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,7 +1216,7 @@ EXPORT_SYMBOL(install_exec_creds);
 /*
  * determine how safe it is to execute the proposed program
  * - the caller must hold ->cred_guard_mutex to protect against
- *   PTRACE_ATTACH
+ *   PTRACE_ATTACH or seccomp thread-sync
  */
 static void check_unsafe_exec(struct linux_binprm *bprm)
 {
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 9ff98b4bfe2e..5d586a45a319 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,6 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+
 #ifdef CONFIG_SECCOMP
 
 #include <linux/thread_info.h>
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index b258878ba754..0f238a43ff1e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -14,6 +14,9 @@
 #define SECCOMP_SET_MODE_STRICT	0
 #define SECCOMP_SET_MODE_FILTER	1
 
+/* Valid flags for SECCOMP_SET_MODE_FILTER */
+#define SECCOMP_FILTER_FLAG_TSYNC	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 9065d2c79c56..74f460179171 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -26,6 +26,7 @@
 #ifdef CONFIG_SECCOMP_FILTER
 #include <asm/syscall.h>
 #include <linux/filter.h>
+#include <linux/pid.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
 #include <linux/tracehook.h>
@@ -225,6 +226,114 @@ static inline void seccomp_assign_mode(struct task_struct *task,
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
+/* Returns 1 if the parent is an ancestor of the child. */
+static int is_ancestor(struct seccomp_filter *parent,
+		       struct seccomp_filter *child)
+{
+	/* NULL is the root ancestor. */
+	if (parent == NULL)
+		return 1;
+	for (; child; child = child->prev)
+		if (child == parent)
+			return 1;
+	return 0;
+}
+
+/**
+ * seccomp_can_sync_threads: checks if all threads can be synchronized
+ *
+ * Expects sighand and cred_guard_mutex locks to be held.
+ *
+ * Returns 0 on success, -ve on error, or the pid of a thread which was
+ * either not in the correct seccomp mode or it did not have an ancestral
+ * seccomp filter.
+ */
+static inline pid_t seccomp_can_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Validate all threads being eligible for synchronization. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		pid_t failed;
+
+		/* Skip current, since it is initiating the sync. */
+		if (thread == caller)
+			continue;
+
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
+		    (thread->seccomp.mode == SECCOMP_MODE_FILTER &&
+		     is_ancestor(thread->seccomp.filter,
+				 caller->seccomp.filter)))
+			continue;
+
+		/* Return the first thread that cannot be synchronized. */
+		failed = task_pid_vnr(thread);
+		/* If the pid cannot be resolved, then return -ESRCH */
+		if (unlikely(WARN_ON(failed == 0)))
+			failed = -ESRCH;
+		return failed;
+	}
+
+	return 0;
+}
+
+/**
+ * seccomp_sync_threads: sets all threads to use current's filter
+ *
+ * Expects sighand and cred_guard_mutex locks to be held, and for
+ * seccomp_can_sync_threads() to have returned success already
+ * without dropping the locks.
+ *
+ */
+static inline void seccomp_sync_threads(void)
+{
+	struct task_struct *thread, *caller;
+
+	BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Synchronize all threads. */
+	caller = current;
+	for_each_thread(caller, thread) {
+		/* Skip current, since it needs no changes. */
+		if (thread == caller)
+			continue;
+
+		/* Get a task reference for the new leaf node. */
+		get_seccomp_filter(caller);
+		/*
+		 * Drop the task reference to the shared ancestor since
+		 * current's path will hold a reference.  (This also
+		 * allows a put before the assignment.)
+		 */
+		put_seccomp_filter(thread);
+		smp_store_release(&thread->seccomp.filter,
+				  caller->seccomp.filter);
+		/*
+		 * Opt the other thread into seccomp if needed.
+		 * As threads are considered to be trust-realm
+		 * equivalent (see ptrace_may_access), it is safe to
+		 * allow one thread to transition the other.
+		 */
+		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
+			/*
+			 * Don't let an unprivileged task work around
+			 * the no_new_privs restriction by creating
+			 * a thread that sets it up, enters seccomp,
+			 * then dies.
+			 */
+			if (task_no_new_privs(caller))
+				task_set_no_new_privs(thread);
+
+			seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
+		}
+	}
+}
+
 /**
  * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
@@ -364,6 +473,15 @@ static long seccomp_attach_filter(unsigned int flags,
 	if (total_insns > MAX_INSNS_PER_PATH)
 		return -ENOMEM;
 
+	/* If thread sync has been requested, check that it is possible. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
+		int ret;
+
+		ret = seccomp_can_sync_threads();
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
 	 * task reference.
@@ -371,6 +489,10 @@ static long seccomp_attach_filter(unsigned int flags,
 	filter->prev = current->seccomp.filter;
 	current->seccomp.filter = filter;
 
+	/* Now that the new filter is in place, synchronize to all threads. */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		seccomp_sync_threads();
+
 	return 0;
 }
 
@@ -590,7 +712,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	long ret = -EINVAL;
 
 	/* Validate flags. */
-	if (flags != 0)
+	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
 		return -EINVAL;
 
 	/* Prepare the new filter before holding any locks. */
@@ -598,6 +720,14 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	if (IS_ERR(prepared))
 		return PTR_ERR(prepared);
 
+	/*
+	 * Make sure we cannot change seccomp or nnp state via TSYNC
+	 * while another thread is in the middle of calling exec.
+	 */
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC &&
+	    mutex_lock_killable(&current->signal->cred_guard_mutex))
+		goto out_free;
+
 	spin_lock_irq(&current->sighand->siglock);
 
 	if (!seccomp_may_assign_mode(seccomp_mode))
@@ -612,6 +742,9 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	seccomp_assign_mode(current, seccomp_mode);
 out:
 	spin_unlock_irq(&current->sighand->siglock);
+	if (flags & SECCOMP_FILTER_FLAG_TSYNC)
+		mutex_unlock(&current->signal->cred_guard_mutex);
+out_free:
 	seccomp_filter_free(prepared);
 	return ret;
 }
-- 
cgit v1.2.3


From b4e05923f9c5bb65ac82988d7b53cfd7425e6f36 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 20 Jul 2014 13:35:44 -0700
Subject: Input: add support for Wacom protocol 4 serial tablets

Recent version of xf86-input-wacom no longer support directly accessing
serial tablets. Instead xf86-input-wacom now expects all wacom tablets to
be driven by the kernel and to show up as evdev devices.

This has caused old serial Wacom tablets to stop working for people who still
have such tablets. Julian Squires has written a serio input driver to fix this:
https://github.com/tokenrove/wacom-serial-iv

This is a cleaned up version of this driver with improved Graphire support
(I own an old Graphire myself).

Signed-off-by: Julian Squires <julian@cipht.net>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 MAINTAINERS                          |   7 +
 drivers/input/tablet/Kconfig         |  10 +
 drivers/input/tablet/Makefile        |   1 +
 drivers/input/tablet/wacom_serial4.c | 616 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/serio.h           |   1 +
 5 files changed, 635 insertions(+)
 create mode 100644 drivers/input/tablet/wacom_serial4.c

(limited to 'include/uapi/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 51ebb779c5f3..c68bd8b6f8e9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9708,6 +9708,13 @@ M:	Pierre Ossman <pierre@ossman.eu>
 S:	Maintained
 F:	drivers/mmc/host/wbsd.*
 
+WACOM PROTOCOL 4 SERIAL TABLETS
+M:	Julian Squires <julian@cipht.net>
+M:	Hans de Goede <hdegoede@redhat.com>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/input/tablet/wacom_serial4.c
+
 WATCHDOG DEVICE DRIVERS
 M:	Wim Van Sebroeck <wim@iguana.be>
 L:	linux-watchdog@vger.kernel.org
diff --git a/drivers/input/tablet/Kconfig b/drivers/input/tablet/Kconfig
index bed7cbf84cfd..a5121b09c63f 100644
--- a/drivers/input/tablet/Kconfig
+++ b/drivers/input/tablet/Kconfig
@@ -89,4 +89,14 @@ config TABLET_USB_WACOM
 	  To compile this driver as a module, choose M here: the
 	  module will be called wacom.
 
+config TABLET_SERIAL_WACOM4
+	tristate "Wacom protocol 4 serial tablet support"
+	select SERIO
+	help
+	  Say Y here if you want to use Wacom protocol 4 serial tablets.
+	  E.g. serial versions of the Cintiq, Graphire or Penpartner.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called wacom_serial4.
+
 endif
diff --git a/drivers/input/tablet/Makefile b/drivers/input/tablet/Makefile
index 3f6c25220638..4d9339fb3b63 100644
--- a/drivers/input/tablet/Makefile
+++ b/drivers/input/tablet/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_TABLET_USB_GTCO)	+= gtco.o
 obj-$(CONFIG_TABLET_USB_HANWANG) += hanwang.o
 obj-$(CONFIG_TABLET_USB_KBTAB)	+= kbtab.o
 obj-$(CONFIG_TABLET_USB_WACOM)	+= wacom.o
+obj-$(CONFIG_TABLET_SERIAL_WACOM4) += wacom_serial4.o
diff --git a/drivers/input/tablet/wacom_serial4.c b/drivers/input/tablet/wacom_serial4.c
new file mode 100644
index 000000000000..d3d251e27307
--- /dev/null
+++ b/drivers/input/tablet/wacom_serial4.c
@@ -0,0 +1,616 @@
+/*
+ * Wacom protocol 4 serial tablet driver
+ *
+ * Copyright 2014      Hans de Goede <hdegoede@redhat.com>
+ * Copyright 2011-2012 Julian Squires <julian@cipht.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version of 2 of the License, or (at your
+ * option) any later version. See the file COPYING in the main directory of
+ * this archive for more details.
+ *
+ * Many thanks to Bill Seremetis, without whom PenPartner support
+ * would not have been possible. Thanks to Patrick Mahoney.
+ *
+ * This driver was developed with reference to much code written by others,
+ * particularly:
+ *  - elo, gunze drivers by Vojtech Pavlik <vojtech@ucw.cz>;
+ *  - wacom_w8001 driver by Jaya Kumar <jayakumar.lkml@gmail.com>;
+ *  - the USB wacom input driver, credited to many people
+ *    (see drivers/input/tablet/wacom.h);
+ *  - new and old versions of linuxwacom / xf86-input-wacom credited to
+ *    Frederic Lepied, France. <Lepied@XFree86.org> and
+ *    Ping Cheng, Wacom. <pingc@wacom.com>;
+ *  - and xf86wacom.c (a presumably ancient version of the linuxwacom code),
+ *    by Frederic Lepied and Raph Levien <raph@gtk.org>.
+ *
+ * To do:
+ *  - support pad buttons; (requires access to a model with pad buttons)
+ *  - support (protocol 4-style) tilt (requires access to a > 1.4 rom model)
+ */
+
+/*
+ * Wacom serial protocol 4 documentation taken from linuxwacom-0.9.9 code,
+ * protocol 4 uses 7 or 9 byte of data in the following format:
+ *
+ *	Byte 1
+ *	bit 7  Sync bit always 1
+ *	bit 6  Pointing device detected
+ *	bit 5  Cursor = 0 / Stylus = 1
+ *	bit 4  Reserved
+ *	bit 3  1 if a button on the pointing device has been pressed
+ *	bit 2  P0 (optional)
+ *	bit 1  X15
+ *	bit 0  X14
+ *
+ *	Byte 2
+ *	bit 7  Always 0
+ *	bits 6-0 = X13 - X7
+ *
+ *	Byte 3
+ *	bit 7  Always 0
+ *	bits 6-0 = X6 - X0
+ *
+ *	Byte 4
+ *	bit 7  Always 0
+ *	bit 6  B3
+ *	bit 5  B2
+ *	bit 4  B1
+ *	bit 3  B0
+ *	bit 2  P1 (optional)
+ *	bit 1  Y15
+ *	bit 0  Y14
+ *
+ *	Byte 5
+ *	bit 7  Always 0
+ *	bits 6-0 = Y13 - Y7
+ *
+ *	Byte 6
+ *	bit 7  Always 0
+ *	bits 6-0 = Y6 - Y0
+ *
+ *	Byte 7
+ *	bit 7 Always 0
+ *	bit 6  Sign of pressure data; or wheel-rel for cursor tool
+ *	bit 5  P7; or REL1 for cursor tool
+ *	bit 4  P6; or REL0 for cursor tool
+ *	bit 3  P5
+ *	bit 2  P4
+ *	bit 1  P3
+ *	bit 0  P2
+ *
+ *	byte 8 and 9 are optional and present only
+ *	in tilt mode.
+ *
+ *	Byte 8
+ *	bit 7 Always 0
+ *	bit 6 Sign of tilt X
+ *	bit 5  Xt6
+ *	bit 4  Xt5
+ *	bit 3  Xt4
+ *	bit 2  Xt3
+ *	bit 1  Xt2
+ *	bit 0  Xt1
+ *
+ *	Byte 9
+ *	bit 7 Always 0
+ *	bit 6 Sign of tilt Y
+ *	bit 5  Yt6
+ *	bit 4  Yt5
+ *	bit 3  Yt4
+ *	bit 2  Yt3
+ *	bit 1  Yt2
+ *	bit 0  Yt1
+ */
+
+#include <linux/completion.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/serio.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "wacom_wac.h"
+
+MODULE_AUTHOR("Julian Squires <julian@cipht.net>, Hans de Goede <hdegoede@redhat.com>");
+MODULE_DESCRIPTION("Wacom protocol 4 serial tablet driver");
+MODULE_LICENSE("GPL");
+
+#define REQUEST_MODEL_AND_ROM_VERSION	"~#"
+#define REQUEST_MAX_COORDINATES		"~C\r"
+#define REQUEST_CONFIGURATION_STRING	"~R\r"
+#define REQUEST_RESET_TO_PROTOCOL_IV	"\r#"
+/*
+ * Note: sending "\r$\r" causes at least the Digitizer II to send
+ * packets in ASCII instead of binary.  "\r#" seems to undo that.
+ */
+
+#define COMMAND_START_SENDING_PACKETS		"ST\r"
+#define COMMAND_STOP_SENDING_PACKETS		"SP\r"
+#define COMMAND_MULTI_MODE_INPUT		"MU1\r"
+#define COMMAND_ORIGIN_IN_UPPER_LEFT		"OC1\r"
+#define COMMAND_ENABLE_ALL_MACRO_BUTTONS	"~M0\r"
+#define COMMAND_DISABLE_GROUP_1_MACRO_BUTTONS	"~M1\r"
+#define COMMAND_TRANSMIT_AT_MAX_RATE		"IT0\r"
+#define COMMAND_DISABLE_INCREMENTAL_MODE	"IN0\r"
+#define COMMAND_ENABLE_CONTINUOUS_MODE		"SR\r"
+#define COMMAND_ENABLE_PRESSURE_MODE		"PH1\r"
+#define COMMAND_Z_FILTER			"ZF1\r"
+
+/* Note that this is a protocol 4 packet without tilt information. */
+#define PACKET_LENGTH		7
+#define DATA_SIZE		32
+
+/* flags */
+#define F_COVERS_SCREEN		0x01
+#define F_HAS_STYLUS2		0x02
+#define F_HAS_SCROLLWHEEL	0x04
+
+enum { STYLUS = 1, ERASER, CURSOR };
+
+static const struct {
+	int device_id;
+	int input_id;
+} tools[] = {
+	{ 0, 0 },
+	{ STYLUS_DEVICE_ID, BTN_TOOL_PEN },
+	{ ERASER_DEVICE_ID, BTN_TOOL_RUBBER },
+	{ CURSOR_DEVICE_ID, BTN_TOOL_MOUSE },
+};
+
+struct wacom {
+	struct input_dev *dev;
+	struct completion cmd_done;
+	int result;
+	u8 expect;
+	u8 eraser_mask;
+	unsigned int extra_z_bits;
+	unsigned int flags;
+	unsigned int res_x, res_y;
+	unsigned int max_x, max_y;
+	unsigned int tool;
+	unsigned int idx;
+	u8 data[DATA_SIZE];
+	char phys[32];
+};
+
+enum {
+	MODEL_CINTIQ		= 0x504C, /* PL */
+	MODEL_CINTIQ2		= 0x4454, /* DT */
+	MODEL_DIGITIZER_II	= 0x5544, /* UD */
+	MODEL_GRAPHIRE		= 0x4554, /* ET */
+	MODEL_PENPARTNER	= 0x4354, /* CT */
+};
+
+static void wacom_handle_model_response(struct wacom *wacom)
+{
+	int major_v, minor_v, r = 0;
+	char *p;
+
+	p = strrchr(wacom->data, 'V');
+	if (p)
+		r = sscanf(p + 1, "%u.%u", &major_v, &minor_v);
+	if (r != 2)
+		major_v = minor_v = 0;
+
+	switch (wacom->data[2] << 8 | wacom->data[3]) {
+	case MODEL_CINTIQ:	/* UNTESTED */
+	case MODEL_CINTIQ2:
+		if ((wacom->data[2] << 8 | wacom->data[3]) == MODEL_CINTIQ) {
+			wacom->dev->name = "Wacom Cintiq";
+			wacom->dev->id.version = MODEL_CINTIQ;
+		} else {
+			wacom->dev->name = "Wacom Cintiq II";
+			wacom->dev->id.version = MODEL_CINTIQ2;
+		}
+		wacom->res_x = 508;
+		wacom->res_y = 508;
+
+		switch (wacom->data[5] << 8 | wacom->data[6]) {
+		case 0x3731: /* PL-710 */
+			wacom->res_x = 2540;
+			wacom->res_y = 2540;
+			/* fall through */
+		case 0x3535: /* PL-550 */
+		case 0x3830: /* PL-800 */
+			wacom->extra_z_bits = 2;
+		}
+
+		wacom->flags = F_COVERS_SCREEN;
+		break;
+
+	case MODEL_PENPARTNER:
+		wacom->dev->name = "Wacom Penpartner";
+		wacom->dev->id.version = MODEL_PENPARTNER;
+		wacom->res_x = 1000;
+		wacom->res_y = 1000;
+		break;
+
+	case MODEL_GRAPHIRE:
+		wacom->dev->name = "Wacom Graphire";
+		wacom->dev->id.version = MODEL_GRAPHIRE;
+		wacom->res_x = 1016;
+		wacom->res_y = 1016;
+		wacom->max_x = 5103;
+		wacom->max_y = 3711;
+		wacom->extra_z_bits = 2;
+		wacom->eraser_mask = 0x08;
+		wacom->flags = F_HAS_STYLUS2 | F_HAS_SCROLLWHEEL;
+		break;
+
+	case MODEL_DIGITIZER_II:
+		wacom->dev->name = "Wacom Digitizer II";
+		wacom->dev->id.version = MODEL_DIGITIZER_II;
+		if (major_v == 1 && minor_v <= 2)
+			wacom->extra_z_bits = 0; /* UNTESTED */
+		break;
+
+	default:
+		dev_err(&wacom->dev->dev, "Unsupported Wacom model %s\n",
+			wacom->data);
+		wacom->result = -ENODEV;
+		return;
+	}
+
+	dev_info(&wacom->dev->dev, "%s tablet, version %u.%u\n",
+		 wacom->dev->name, major_v, minor_v);
+}
+
+static void wacom_handle_configuration_response(struct wacom *wacom)
+{
+	int r, skip;
+
+	dev_dbg(&wacom->dev->dev, "Configuration string: %s\n", wacom->data);
+	r = sscanf(wacom->data, "~R%x,%u,%u,%u,%u", &skip, &skip, &skip,
+		   &wacom->res_x, &wacom->res_y);
+	if (r != 5)
+		dev_warn(&wacom->dev->dev, "could not get resolution\n");
+}
+
+static void wacom_handle_coordinates_response(struct wacom *wacom)
+{
+	int r;
+
+	dev_dbg(&wacom->dev->dev, "Coordinates string: %s\n", wacom->data);
+	r = sscanf(wacom->data, "~C%u,%u", &wacom->max_x, &wacom->max_y);
+	if (r != 2)
+		dev_warn(&wacom->dev->dev, "could not get max coordinates\n");
+}
+
+static void wacom_handle_response(struct wacom *wacom)
+{
+	if (wacom->data[0] != '~' || wacom->data[1] != wacom->expect) {
+		dev_err(&wacom->dev->dev,
+			"Wacom got an unexpected response: %s\n", wacom->data);
+		wacom->result = -EIO;
+	} else {
+		wacom->result = 0;
+
+		switch (wacom->data[1]) {
+		case '#':
+			wacom_handle_model_response(wacom);
+			break;
+		case 'R':
+			wacom_handle_configuration_response(wacom);
+			break;
+		case 'C':
+			wacom_handle_coordinates_response(wacom);
+			break;
+		}
+	}
+
+	complete(&wacom->cmd_done);
+}
+
+static void wacom_handle_packet(struct wacom *wacom)
+{
+	u8 in_proximity_p, stylus_p, button;
+	unsigned int tool;
+	int x, y, z;
+
+	in_proximity_p = wacom->data[0] & 0x40;
+	stylus_p = wacom->data[0] & 0x20;
+	button = (wacom->data[3] & 0x78) >> 3;
+	x = (wacom->data[0] & 3) << 14 | wacom->data[1]<<7 | wacom->data[2];
+	y = (wacom->data[3] & 3) << 14 | wacom->data[4]<<7 | wacom->data[5];
+
+	if (in_proximity_p && stylus_p) {
+		z = wacom->data[6] & 0x7f;
+		if (wacom->extra_z_bits >= 1)
+			z = z << 1 | (wacom->data[3] & 0x4) >> 2;
+		if (wacom->extra_z_bits > 1)
+			z = z << 1 | (wacom->data[0] & 0x4) >> 2;
+		z = z ^ (0x40 << wacom->extra_z_bits);
+	} else {
+		z = -1;
+	}
+
+	if (stylus_p)
+		tool = (button & wacom->eraser_mask) ? ERASER : STYLUS;
+	else
+		tool = CURSOR;
+
+	if (tool != wacom->tool && wacom->tool != 0) {
+		input_report_key(wacom->dev, tools[wacom->tool].input_id, 0);
+		input_sync(wacom->dev);
+	}
+	wacom->tool = tool;
+
+	input_report_key(wacom->dev, tools[tool].input_id, in_proximity_p);
+	input_report_abs(wacom->dev, ABS_MISC,
+			 in_proximity_p ? tools[tool].device_id : 0);
+	input_report_abs(wacom->dev, ABS_X, x);
+	input_report_abs(wacom->dev, ABS_Y, y);
+	input_report_abs(wacom->dev, ABS_PRESSURE, z);
+	if (stylus_p) {
+		input_report_key(wacom->dev, BTN_TOUCH, button & 1);
+		input_report_key(wacom->dev, BTN_STYLUS, button & 2);
+		input_report_key(wacom->dev, BTN_STYLUS2, button & 4);
+	} else {
+		input_report_key(wacom->dev, BTN_LEFT, button & 1);
+		input_report_key(wacom->dev, BTN_RIGHT, button & 2);
+		input_report_key(wacom->dev, BTN_MIDDLE, button & 4);
+		/* handle relative wheel for non-stylus device */
+		z = (wacom->data[6] & 0x30) >> 4;
+		if (wacom->data[6] & 0x40)
+			z = -z;
+		input_report_rel(wacom->dev, REL_WHEEL, z);
+	}
+	input_sync(wacom->dev);
+}
+
+static void wacom_clear_data_buf(struct wacom *wacom)
+{
+	memset(wacom->data, 0, DATA_SIZE);
+	wacom->idx = 0;
+}
+
+static irqreturn_t wacom_interrupt(struct serio *serio, unsigned char data,
+				   unsigned int flags)
+{
+	struct wacom *wacom = serio_get_drvdata(serio);
+
+	if (data & 0x80)
+		wacom->idx = 0;
+
+	/*
+	 * We're either expecting a carriage return-terminated ASCII
+	 * response string, or a seven-byte packet with the MSB set on
+	 * the first byte.
+	 *
+	 * Note however that some tablets (the PenPartner, for
+	 * example) don't send a carriage return at the end of a
+	 * command.  We handle these by waiting for timeout.
+	 */
+	if (data == '\r' && !(wacom->data[0] & 0x80)) {
+		wacom_handle_response(wacom);
+		wacom_clear_data_buf(wacom);
+		return IRQ_HANDLED;
+	}
+
+	/* Leave place for 0 termination */
+	if (wacom->idx > (DATA_SIZE - 2)) {
+		dev_dbg(&wacom->dev->dev,
+			"throwing away %d bytes of garbage\n", wacom->idx);
+		wacom_clear_data_buf(wacom);
+	}
+	wacom->data[wacom->idx++] = data;
+
+	if (wacom->idx == PACKET_LENGTH && (wacom->data[0] & 0x80)) {
+		wacom_handle_packet(wacom);
+		wacom_clear_data_buf(wacom);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void wacom_disconnect(struct serio *serio)
+{
+	struct wacom *wacom = serio_get_drvdata(serio);
+
+	serio_close(serio);
+	serio_set_drvdata(serio, NULL);
+	input_unregister_device(wacom->dev);
+	kfree(wacom);
+}
+
+static int wacom_send(struct serio *serio, const u8 *command)
+{
+	int err = 0;
+
+	for (; !err && *command; command++)
+		err = serio_write(serio, *command);
+
+	return err;
+}
+
+static int wacom_send_setup_string(struct wacom *wacom, struct serio *serio)
+{
+	const u8 *cmd;
+
+	switch (wacom->dev->id.version) {
+	case MODEL_CINTIQ:	/* UNTESTED */
+		cmd = COMMAND_ORIGIN_IN_UPPER_LEFT
+			COMMAND_TRANSMIT_AT_MAX_RATE
+			COMMAND_ENABLE_CONTINUOUS_MODE
+			COMMAND_START_SENDING_PACKETS;
+		break;
+
+	case MODEL_PENPARTNER:
+		cmd = COMMAND_ENABLE_PRESSURE_MODE
+			COMMAND_START_SENDING_PACKETS;
+		break;
+
+	default:
+		cmd = COMMAND_MULTI_MODE_INPUT
+			COMMAND_ORIGIN_IN_UPPER_LEFT
+			COMMAND_ENABLE_ALL_MACRO_BUTTONS
+			COMMAND_DISABLE_GROUP_1_MACRO_BUTTONS
+			COMMAND_TRANSMIT_AT_MAX_RATE
+			COMMAND_DISABLE_INCREMENTAL_MODE
+			COMMAND_ENABLE_CONTINUOUS_MODE
+			COMMAND_Z_FILTER
+			COMMAND_START_SENDING_PACKETS;
+		break;
+	}
+
+	return wacom_send(serio, cmd);
+}
+
+static int wacom_send_and_wait(struct wacom *wacom, struct serio *serio,
+			       const u8 *cmd, const char *desc)
+{
+	int err;
+	unsigned long u;
+
+	wacom->expect = cmd[1];
+	init_completion(&wacom->cmd_done);
+
+	err = wacom_send(serio, cmd);
+	if (err)
+		return err;
+
+	u = wait_for_completion_timeout(&wacom->cmd_done, HZ);
+	if (u == 0) {
+		/* Timeout, process what we've received. */
+		wacom_handle_response(wacom);
+	}
+
+	wacom->expect = 0;
+	return wacom->result;
+}
+
+static int wacom_setup(struct wacom *wacom, struct serio *serio)
+{
+	int err;
+
+	/* Note that setting the link speed is the job of inputattach.
+	 * We assume that reset negotiation has already happened,
+	 * here. */
+	err = wacom_send_and_wait(wacom, serio, REQUEST_MODEL_AND_ROM_VERSION,
+				  "model and version");
+	if (err)
+		return err;
+
+	if (!(wacom->res_x && wacom->res_y)) {
+		err = wacom_send_and_wait(wacom, serio,
+					  REQUEST_CONFIGURATION_STRING,
+					  "configuration string");
+		if (err)
+			return err;
+	}
+
+	if (!(wacom->max_x && wacom->max_y)) {
+		err = wacom_send_and_wait(wacom, serio,
+					  REQUEST_MAX_COORDINATES,
+					  "coordinates string");
+		if (err)
+			return err;
+	}
+
+	return wacom_send_setup_string(wacom, serio);
+}
+
+static int wacom_connect(struct serio *serio, struct serio_driver *drv)
+{
+	struct wacom *wacom;
+	struct input_dev *input_dev;
+	int err = -ENOMEM;
+
+	wacom = kzalloc(sizeof(struct wacom), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!wacom || !input_dev)
+		goto free_device;
+
+	wacom->dev = input_dev;
+	wacom->extra_z_bits = 1;
+	wacom->eraser_mask = 0x04;
+	wacom->tool = wacom->idx = 0;
+	snprintf(wacom->phys, sizeof(wacom->phys), "%s/input0", serio->phys);
+	input_dev->phys = wacom->phys;
+	input_dev->id.bustype = BUS_RS232;
+	input_dev->id.vendor  = SERIO_WACOM_IV;
+	input_dev->id.product = serio->id.extra;
+	input_dev->dev.parent = &serio->dev;
+
+	input_dev->evbit[0] =
+		BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS) | BIT_MASK(EV_REL);
+	set_bit(ABS_MISC, input_dev->absbit);
+	set_bit(BTN_TOOL_PEN, input_dev->keybit);
+	set_bit(BTN_TOOL_RUBBER, input_dev->keybit);
+	set_bit(BTN_TOOL_MOUSE, input_dev->keybit);
+	set_bit(BTN_TOUCH, input_dev->keybit);
+	set_bit(BTN_STYLUS, input_dev->keybit);
+	set_bit(BTN_LEFT, input_dev->keybit);
+	set_bit(BTN_RIGHT, input_dev->keybit);
+	set_bit(BTN_MIDDLE, input_dev->keybit);
+
+	serio_set_drvdata(serio, wacom);
+
+	err = serio_open(serio, drv);
+	if (err)
+		goto free_device;
+
+	err = wacom_setup(wacom, serio);
+	if (err)
+		goto close_serio;
+
+	set_bit(INPUT_PROP_DIRECT, input_dev->propbit);
+	if (!(wacom->flags & F_COVERS_SCREEN))
+		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+
+	if (wacom->flags & F_HAS_STYLUS2)
+		__set_bit(BTN_STYLUS2, input_dev->keybit);
+
+	if (wacom->flags & F_HAS_SCROLLWHEEL)
+		__set_bit(REL_WHEEL, input_dev->relbit);
+
+	input_abs_set_res(wacom->dev, ABS_X, wacom->res_x);
+	input_abs_set_res(wacom->dev, ABS_Y, wacom->res_y);
+	input_set_abs_params(wacom->dev, ABS_X, 0, wacom->max_x, 0, 0);
+	input_set_abs_params(wacom->dev, ABS_Y, 0, wacom->max_y, 0, 0);
+	input_set_abs_params(wacom->dev, ABS_PRESSURE, -1,
+			     (1 << (7 + wacom->extra_z_bits)) - 1, 0, 0);
+
+	err = input_register_device(wacom->dev);
+	if (err)
+		goto close_serio;
+
+	return 0;
+
+close_serio:
+	serio_close(serio);
+free_device:
+	serio_set_drvdata(serio, NULL);
+	input_free_device(input_dev);
+	kfree(wacom);
+	return err;
+}
+
+static struct serio_device_id wacom_serio_ids[] = {
+	{
+		.type	= SERIO_RS232,
+		.proto	= SERIO_WACOM_IV,
+		.id	= SERIO_ANY,
+		.extra	= SERIO_ANY,
+	},
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(serio, wacom_serio_ids);
+
+static struct serio_driver wacom_drv = {
+	.driver		= {
+		.name	= "wacom_serial4",
+	},
+	.description	= "Wacom protocol 4 serial tablet driver",
+	.id_table	= wacom_serio_ids,
+	.interrupt	= wacom_interrupt,
+	.connect	= wacom_connect,
+	.disconnect	= wacom_disconnect,
+};
+
+module_serio_driver(wacom_drv);
diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h
index 9f53fa7fc132..becdd78295cc 100644
--- a/include/uapi/linux/serio.h
+++ b/include/uapi/linux/serio.h
@@ -76,5 +76,6 @@
 #define SERIO_HAMPSHIRE	0x3b
 #define SERIO_PS2MULT	0x3c
 #define SERIO_TSC40	0x3d
+#define SERIO_WACOM_IV	0x3e
 
 #endif /* _UAPI_SERIO_H */
-- 
cgit v1.2.3


From ba4e9a61ad54c438d4c7b655e94e31f23a6fe13f Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Sun, 20 Jul 2014 17:27:09 -0700
Subject: Input: uinput - add UI_GET_VERSION ioctl

This ioctl is the counterpart to EVIOCGVERSION and returns the
uinput-version the kernel was compiled with.

Reviewed-by: Peter Hutterer <peter.hutterer@who-t.net>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/uinput.c | 6 ++++++
 include/uapi/linux/uinput.h | 9 +++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 883f045f37df..421e29e4cd81 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -723,6 +723,12 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd,
 	}
 
 	switch (cmd) {
+		case UI_GET_VERSION:
+			if (put_user(UINPUT_VERSION,
+				     (unsigned int __user *)p))
+				retval = -EFAULT;
+			goto out;
+
 		case UI_DEV_CREATE:
 			retval = uinput_create_device(udev);
 			goto out;
diff --git a/include/uapi/linux/uinput.h b/include/uapi/linux/uinput.h
index 0389b489bbba..baeab83deb64 100644
--- a/include/uapi/linux/uinput.h
+++ b/include/uapi/linux/uinput.h
@@ -84,6 +84,15 @@ struct uinput_ff_erase {
  */
 #define UI_GET_SYSNAME(len)	_IOC(_IOC_READ, UINPUT_IOCTL_BASE, 300, len)
 
+/**
+ * UI_GET_VERSION - Return version of uinput protocol
+ *
+ * This writes uinput protocol version implemented by the kernel into
+ * the integer pointed to by the ioctl argument. The protocol version
+ * is hard-coded in the kernel and is independent of the uinput device.
+ */
+#define UI_GET_VERSION		_IOR(UINPUT_IOCTL_BASE, 301, unsigned int)
+
 /*
  * To write a force-feedback-capable driver, the upload_effect
  * and erase_effect callbacks in input_dev must be implemented.
-- 
cgit v1.2.3


From 9e9e3927ea76c95873f153c19b7c0d7f319dabc4 Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 27 Jun 2014 17:42:18 -0300
Subject: [media] v4l: uapi: add SDR format RU12LE

V4L2_SDR_FMT_RU12LE - Real unsigned 12-bit little endian sample
inside 16-bit (2 byte). V4L2 FourCC: RU12.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 1f1a65c1fe8e..e8f01cd8a17d 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -453,6 +453,7 @@ struct v4l2_pix_format {
 /* SDR formats - used only for Software Defined Radio devices */
 #define V4L2_SDR_FMT_CU8          v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */
 #define V4L2_SDR_FMT_CU16LE       v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */
+#define V4L2_SDR_FMT_RU12LE       v4l2_fourcc('R', 'U', '1', '2') /* real u12le */
 
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC		0xfeedcafe
-- 
cgit v1.2.3


From c0c03886763f4d5ccfc5d854fd8dfe77814753be Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 11 Jul 2014 13:25:14 -0300
Subject: [media] v4l: uapi: add SDR format CS8

V4L2_SDR_FMT_CS8 - Complex signed 8-bit IQ sample

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e8f01cd8a17d..e943e4cc8df6 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -453,6 +453,7 @@ struct v4l2_pix_format {
 /* SDR formats - used only for Software Defined Radio devices */
 #define V4L2_SDR_FMT_CU8          v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */
 #define V4L2_SDR_FMT_CU16LE       v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */
+#define V4L2_SDR_FMT_CS8          v4l2_fourcc('C', 'S', '0', '8') /* complex s8 */
 #define V4L2_SDR_FMT_RU12LE       v4l2_fourcc('R', 'U', '1', '2') /* real u12le */
 
 /* priv field value to indicates that subsequent fields are valid. */
-- 
cgit v1.2.3


From 5c5be570043f864b8373ed26fd68698fe352a0ef Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 11 Jul 2014 16:35:47 -0300
Subject: [media] v4l: uapi: add SDR format CS14

V4L2_SDR_FMT_CS14LE - Complex signed 14-bit IQ sample

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e943e4cc8df6..6da302d46145 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -454,6 +454,7 @@ struct v4l2_pix_format {
 #define V4L2_SDR_FMT_CU8          v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */
 #define V4L2_SDR_FMT_CU16LE       v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */
 #define V4L2_SDR_FMT_CS8          v4l2_fourcc('C', 'S', '0', '8') /* complex s8 */
+#define V4L2_SDR_FMT_CS14LE       v4l2_fourcc('C', 'S', '1', '4') /* complex s14le */
 #define V4L2_SDR_FMT_RU12LE       v4l2_fourcc('R', 'U', '1', '2') /* real u12le */
 
 /* priv field value to indicates that subsequent fields are valid. */
-- 
cgit v1.2.3


From aaa968b636550042023bffa075adcdb01e6d257c Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Fri, 18 Jul 2014 15:28:24 -0300
Subject: [media] v4l: videodev2: add buffer size to SDR format

Add buffer size field to struct v4l2_sdr_format. It is used for
negotiate streaming buffer size between application and driver.

Signed-off-by: Antti Palosaari <crope@iki.fi>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 6da302d46145..8fc4822c7883 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1783,10 +1783,12 @@ struct v4l2_pix_format_mplane {
 /**
  * struct v4l2_sdr_format - SDR format definition
  * @pixelformat:	little endian four character code (fourcc)
+ * @buffersize:		maximum size in bytes required for data
  */
 struct v4l2_sdr_format {
 	__u32				pixelformat;
-	__u8				reserved[28];
+	__u32				buffersize;
+	__u8				reserved[24];
 } __attribute__ ((packed));
 
 /**
-- 
cgit v1.2.3


From 273886b4f71a1a960a3625833df2f037efffab45 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 18 Jul 2014 02:58:41 -0300
Subject: [media] videodev2.h: add defines for the VBI field start lines

While working with raw and sliced VBI support in several applications
I noticed that you really need to know the start linenumbers for
each video field in order to correctly convert the start line numbers
reported by v4l2_vbi_format to the line numbers used in v4l2_sliced_vbi_format.

This patch adds four defines that specify the start lines for each
field for both 525 and 625 line standards.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 include/uapi/linux/videodev2.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 8fc4822c7883..5fd42027877a 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1642,6 +1642,12 @@ struct v4l2_vbi_format {
 #define V4L2_VBI_UNSYNC		(1 << 0)
 #define V4L2_VBI_INTERLACED	(1 << 1)
 
+/* ITU-R start lines for each field */
+#define V4L2_VBI_ITU_525_F1_START (1)
+#define V4L2_VBI_ITU_525_F2_START (264)
+#define V4L2_VBI_ITU_625_F1_START (1)
+#define V4L2_VBI_ITU_625_F2_START (314)
+
 /* Sliced VBI
  *
  *    This implements is a proposal V4L2 API to allow SLICED VBI
-- 
cgit v1.2.3


From d7afaec0b564f0609e116f562983b8e72fc3e9c9 Mon Sep 17 00:00:00 2001
From: Andrew Gallagher <andrewjcg@fb.com>
Date: Tue, 22 Jul 2014 16:37:43 +0200
Subject: fuse: add FUSE_NO_OPEN_SUPPORT flag to INIT

Here some additional changes to set a capability flag so that clients can
detect when it's appropriate to return -ENOSYS from open.

This amends the following commit introduced in 3.14:

  7678ac50615d  fuse: support clients that don't implement 'open'

However we can only add the flag to 3.15 and later since there was no
protocol version update in 3.14.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: <stable@vger.kernel.org> # v3.15+
---
 fs/fuse/inode.c           | 2 +-
 include/uapi/linux/fuse.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 5ca874f2415b..03246cd9d47a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -935,7 +935,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
 		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
-		FUSE_WRITEBACK_CACHE;
+		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 40b5ca8a1b1f..25084a052a1e 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -101,6 +101,7 @@
  *  - add FATTR_CTIME
  *  - add ctime and ctimensec to fuse_setattr_in
  *  - add FUSE_RENAME2 request
+ *  - add FUSE_NO_OPEN_SUPPORT flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -229,6 +230,7 @@ struct fuse_file_lock {
  * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
  * FUSE_ASYNC_DIO: asynchronous direct I/O submission
  * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -247,6 +249,7 @@ struct fuse_file_lock {
 #define FUSE_READDIRPLUS_AUTO	(1 << 14)
 #define FUSE_ASYNC_DIO		(1 << 15)
 #define FUSE_WRITEBACK_CACHE	(1 << 16)
+#define FUSE_NO_OPEN_SUPPORT	(1 << 17)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit v1.2.3


From 5cd667b0a4567048bb555927d6ee564f4e5620a9 Mon Sep 17 00:00:00 2001
From: Alex Wang <alexw@nicira.com>
Date: Thu, 17 Jul 2014 15:14:13 -0700
Subject: openvswitch: Allow each vport to have an array of 'port_id's.

In order to allow handlers directly read upcalls from datapath,
we need to support per-handler netlink socket for each vport in
datapath.  This commit makes this happen.  Also, it is guaranteed
to be backward compatible with previous branch.

Signed-off-by: Alex Wang <alexw@nicira.com>
Acked-by: Thomas Graf <tgraf@redhat.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 include/uapi/linux/openvswitch.h |  13 +++--
 net/openvswitch/datapath.c       |  23 ++++++---
 net/openvswitch/vport.c          | 101 ++++++++++++++++++++++++++++++++++++++-
 net/openvswitch/vport.h          |  27 +++++++++--
 4 files changed, 148 insertions(+), 16 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 0b979ee4bfc0..a794d1dd7b40 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -118,6 +118,9 @@ struct ovs_vport_stats {
 /* Allow last Netlink attribute to be unaligned */
 #define OVS_DP_F_UNALIGNED	(1 << 0)
 
+/* Allow datapath to associate multiple Netlink PIDs to each vport */
+#define OVS_DP_F_VPORT_PIDS	(1 << 1)
+
 /* Fixed logical ports. */
 #define OVSP_LOCAL      ((__u32)0)
 
@@ -203,9 +206,10 @@ enum ovs_vport_type {
  * this is the name of the network device.  Maximum length %IFNAMSIZ-1 bytes
  * plus a null terminator.
  * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
- * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
- * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
- * this port.  A value of zero indicates that upcalls should not be sent.
+ * @OVS_VPORT_ATTR_UPCALL_PID: The array of Netlink socket pids in userspace
+ * among which OVS_PACKET_CMD_MISS upcalls will be distributed for packets
+ * received on this port.  If this is a single-element array of value 0,
+ * upcalls should not be sent.
  * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
  * packets sent or received through the vport.
  *
@@ -228,7 +232,8 @@ enum ovs_vport_attr {
 	OVS_VPORT_ATTR_TYPE,	/* u32 OVS_VPORT_TYPE_* constant. */
 	OVS_VPORT_ATTR_NAME,	/* string name, up to IFNAMSIZ bytes long */
 	OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
-	OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
+	OVS_VPORT_ATTR_UPCALL_PID, /* array of u32 Netlink socket PIDs for */
+				/* receiving upcalls */
 	OVS_VPORT_ATTR_STATS,	/* struct ovs_vport_stats */
 	__OVS_VPORT_ATTR_MAX
 };
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 20f59b62721a..65a8e5c089e4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -266,7 +266,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 		upcall.cmd = OVS_PACKET_CMD_MISS;
 		upcall.key = &key;
 		upcall.userdata = NULL;
-		upcall.portid = p->upcall_portid;
+		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
 		ovs_dp_upcall(dp, skb, &upcall);
 		consume_skb(skb);
 		stats_counter = &stats->n_missed;
@@ -1373,7 +1373,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	parms.options = NULL;
 	parms.dp = dp;
 	parms.port_no = OVSP_LOCAL;
-	parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
 
 	ovs_dp_change(dp, a);
 
@@ -1632,8 +1632,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 
 	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
 	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
-	    nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
-	    nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
+	    nla_put_string(skb, OVS_VPORT_ATTR_NAME,
+			   vport->ops->get_name(vport)))
 		goto nla_put_failure;
 
 	ovs_vport_get_stats(vport, &vport_stats);
@@ -1641,6 +1641,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 		    &vport_stats))
 		goto nla_put_failure;
 
+	if (ovs_vport_get_upcall_portids(vport, skb))
+		goto nla_put_failure;
+
 	err = ovs_vport_get_options(vport, skb);
 	if (err == -EMSGSIZE)
 		goto error;
@@ -1762,7 +1765,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
 	parms.dp = dp;
 	parms.port_no = port_no;
-	parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+	parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
 
 	vport = new_vport(&parms);
 	err = PTR_ERR(vport);
@@ -1812,8 +1815,14 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
 			goto exit_unlock_free;
 	}
 
-	if (a[OVS_VPORT_ATTR_UPCALL_PID])
-		vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
+		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
+
+		err = ovs_vport_set_upcall_portids(vport, ids);
+		if (err)
+			goto exit_unlock_free;
+	}
 
 	err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
 				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 42c0f4a0b78c..702fb21bfe15 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -134,10 +134,12 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 
 	vport->dp = parms->dp;
 	vport->port_no = parms->port_no;
-	vport->upcall_portid = parms->upcall_portid;
 	vport->ops = ops;
 	INIT_HLIST_NODE(&vport->dp_hash_node);
 
+	if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids))
+		return ERR_PTR(-EINVAL);
+
 	vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 	if (!vport->percpu_stats) {
 		kfree(vport);
@@ -161,6 +163,10 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
  */
 void ovs_vport_free(struct vport *vport)
 {
+	/* vport is freed from RCU callback or error path, Therefore
+	 * it is safe to use raw dereference.
+	 */
+	kfree(rcu_dereference_raw(vport->upcall_portids));
 	free_percpu(vport->percpu_stats);
 	kfree(vport);
 }
@@ -326,6 +332,99 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
 	return 0;
 }
 
+/**
+ *	ovs_vport_set_upcall_portids - set upcall portids of @vport.
+ *
+ * @vport: vport to modify.
+ * @ids: new configuration, an array of port ids.
+ *
+ * Sets the vport's upcall_portids to @ids.
+ *
+ * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
+ * as an array of U32.
+ *
+ * Must be called with ovs_mutex.
+ */
+int ovs_vport_set_upcall_portids(struct vport *vport,  struct nlattr *ids)
+{
+	struct vport_portids *old, *vport_portids;
+
+	if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
+		return -EINVAL;
+
+	old = ovsl_dereference(vport->upcall_portids);
+
+	vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
+				GFP_KERNEL);
+	if (!vport_portids)
+		return -ENOMEM;
+
+	vport_portids->n_ids = nla_len(ids) / sizeof(u32);
+	vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
+	nla_memcpy(vport_portids->ids, ids, nla_len(ids));
+
+	rcu_assign_pointer(vport->upcall_portids, vport_portids);
+
+	if (old)
+		kfree_rcu(old, rcu);
+	return 0;
+}
+
+/**
+ *	ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
+ *
+ * @vport: vport from which to retrieve the portids.
+ * @skb: sk_buff where portids should be appended.
+ *
+ * Retrieves the configuration of the given vport, appending the
+ * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
+ * portids to @skb.
+ *
+ * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
+ * If an error occurs, @skb is left unmodified.  Must be called with
+ * ovs_mutex or rcu_read_lock.
+ */
+int ovs_vport_get_upcall_portids(const struct vport *vport,
+				 struct sk_buff *skb)
+{
+	struct vport_portids *ids;
+
+	ids = rcu_dereference_ovsl(vport->upcall_portids);
+
+	if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
+		return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
+			       ids->n_ids * sizeof(u32), (void *)ids->ids);
+	else
+		return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
+}
+
+/**
+ *	ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
+ *
+ * @vport: vport from which the missed packet is received.
+ * @skb: skb that the missed packet was received.
+ *
+ * Uses the skb_get_hash() to select the upcall portid to send the
+ * upcall.
+ *
+ * Returns the portid of the target socket.  Must be called with rcu_read_lock.
+ */
+u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb)
+{
+	struct vport_portids *ids;
+	u32 ids_index;
+	u32 hash;
+
+	ids = rcu_dereference(p->upcall_portids);
+
+	if (ids->n_ids == 1 && ids->ids[0] == 0)
+		return 0;
+
+	hash = skb_get_hash(skb);
+	ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
+	return ids->ids[ids_index];
+}
+
 /**
  *	ovs_vport_receive - pass up received packet to the datapath for processing
  *
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 8d721e62f388..35f89d84b45e 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -23,6 +23,7 @@
 #include <linux/list.h>
 #include <linux/netlink.h>
 #include <linux/openvswitch.h>
+#include <linux/reciprocal_div.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/u64_stats_sync.h>
@@ -52,6 +53,10 @@ void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
 int ovs_vport_set_options(struct vport *, struct nlattr *options);
 int ovs_vport_get_options(const struct vport *, struct sk_buff *);
 
+int ovs_vport_set_upcall_portids(struct vport *, struct nlattr *pids);
+int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
+u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
+
 int ovs_vport_send(struct vport *, struct sk_buff *);
 
 /* The following definitions are for implementers of vport devices: */
@@ -62,13 +67,27 @@ struct vport_err_stats {
 	u64 tx_dropped;
 	u64 tx_errors;
 };
+/**
+ * struct vport_portids - array of netlink portids of a vport.
+ *                        must be protected by rcu.
+ * @rn_ids: The reciprocal value of @n_ids.
+ * @rcu: RCU callback head for deferred destruction.
+ * @n_ids: Size of @ids array.
+ * @ids: Array storing the Netlink socket pids to be used for packets received
+ * on this port that miss the flow table.
+ */
+struct vport_portids {
+	struct reciprocal_value rn_ids;
+	struct rcu_head rcu;
+	u32 n_ids;
+	u32 ids[];
+};
 
 /**
  * struct vport - one port within a datapath
  * @rcu: RCU callback head for deferred destruction.
  * @dp: Datapath to which this port belongs.
- * @upcall_portid: The Netlink port to use for packets received on this port that
- * miss the flow table.
+ * @upcall_portids: RCU protected 'struct vport_portids'.
  * @port_no: Index into @dp's @ports array.
  * @hash_node: Element in @dev_table hash table in vport.c.
  * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
@@ -80,7 +99,7 @@ struct vport_err_stats {
 struct vport {
 	struct rcu_head rcu;
 	struct datapath	*dp;
-	u32 upcall_portid;
+	struct vport_portids __rcu *upcall_portids;
 	u16 port_no;
 
 	struct hlist_node hash_node;
@@ -111,7 +130,7 @@ struct vport_parms {
 	/* For ovs_vport_alloc(). */
 	struct datapath *dp;
 	u16 port_no;
-	u32 upcall_portid;
+	struct nlattr *upcall_portids;
 };
 
 /**
-- 
cgit v1.2.3


From 811c508104d092683ea5fe86cdcfd23dded22934 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 21 Jul 2014 10:45:37 -0300
Subject: [media] v4l2-ctrls: add new RDS TX controls

The si4713 supports several RDS features not yet implemented in the driver.

This patch adds the missing RDS functionality to the list of RDS controls.

The ALT_FREQS control is a compound control containing an array of up
to 25 (the maximum according to the RDS standard) frequencies. To support
that the V4L2_CTRL_TYPE_U32 was added.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 35 +++++++++++++++++++++++++++++++++++
 include/media/v4l2-ctrls.h           |  2 ++
 include/uapi/linux/v4l2-controls.h   |  9 +++++++++
 include/uapi/linux/videodev2.h       |  2 ++
 4 files changed, 48 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 5db038525f99..4863cf05175d 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -805,6 +805,15 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_RDS_TX_PTY:		return "RDS Program Type";
 	case V4L2_CID_RDS_TX_PS_NAME:		return "RDS PS Name";
 	case V4L2_CID_RDS_TX_RADIO_TEXT:	return "RDS Radio Text";
+	case V4L2_CID_RDS_TX_MONO_STEREO:	return "RDS Stereo";
+	case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD:	return "RDS Artificial Head";
+	case V4L2_CID_RDS_TX_COMPRESSED:	return "RDS Compressed";
+	case V4L2_CID_RDS_TX_DYNAMIC_PTY:	return "RDS Dynamic PTY";
+	case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement";
+	case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM:	return "RDS Traffic Program";
+	case V4L2_CID_RDS_TX_MUSIC_SPEECH:	return "RDS Music";
+	case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE:	return "RDS Enable Alt Frequencies";
+	case V4L2_CID_RDS_TX_ALT_FREQS:		return "RDS Alternate Frequencies";
 	case V4L2_CID_AUDIO_LIMITER_ENABLED:	return "Audio Limiter Feature Enabled";
 	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME: return "Audio Limiter Release Time";
 	case V4L2_CID_AUDIO_LIMITER_DEVIATION:	return "Audio Limiter Deviation";
@@ -946,6 +955,14 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:
 	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:
 	case V4L2_CID_RF_TUNER_PLL_LOCK:
+	case V4L2_CID_RDS_TX_MONO_STEREO:
+	case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD:
+	case V4L2_CID_RDS_TX_COMPRESSED:
+	case V4L2_CID_RDS_TX_DYNAMIC_PTY:
+	case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT:
+	case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM:
+	case V4L2_CID_RDS_TX_MUSIC_SPEECH:
+	case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE:
 		*type = V4L2_CTRL_TYPE_BOOLEAN;
 		*min = 0;
 		*max = *step = 1;
@@ -1089,6 +1106,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:
 		*type = V4L2_CTRL_TYPE_U16;
 		break;
+	case V4L2_CID_RDS_TX_ALT_FREQS:
+		*type = V4L2_CTRL_TYPE_U32;
+		break;
 	default:
 		*type = V4L2_CTRL_TYPE_INTEGER;
 		break;
@@ -1209,6 +1229,8 @@ static bool std_equal(const struct v4l2_ctrl *ctrl, u32 idx,
 		return ptr1.p_u8[idx] == ptr2.p_u8[idx];
 	case V4L2_CTRL_TYPE_U16:
 		return ptr1.p_u16[idx] == ptr2.p_u16[idx];
+	case V4L2_CTRL_TYPE_U32:
+		return ptr1.p_u32[idx] == ptr2.p_u32[idx];
 	default:
 		if (ctrl->is_int)
 			return ptr1.p_s32[idx] == ptr2.p_s32[idx];
@@ -1242,6 +1264,9 @@ static void std_init(const struct v4l2_ctrl *ctrl, u32 idx,
 	case V4L2_CTRL_TYPE_U16:
 		ptr.p_u16[idx] = ctrl->default_value;
 		break;
+	case V4L2_CTRL_TYPE_U32:
+		ptr.p_u32[idx] = ctrl->default_value;
+		break;
 	default:
 		idx *= ctrl->elem_size;
 		memset(ptr.p + idx, 0, ctrl->elem_size);
@@ -1289,6 +1314,9 @@ static void std_log(const struct v4l2_ctrl *ctrl)
 	case V4L2_CTRL_TYPE_U16:
 		pr_cont("%u", (unsigned)*ptr.p_u16);
 		break;
+	case V4L2_CTRL_TYPE_U32:
+		pr_cont("%u", (unsigned)*ptr.p_u32);
+		break;
 	default:
 		pr_cont("unknown type %d", ctrl->type);
 		break;
@@ -1346,6 +1374,8 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
 		return ROUND_TO_RANGE(ptr.p_u8[idx], u8, ctrl);
 	case V4L2_CTRL_TYPE_U16:
 		return ROUND_TO_RANGE(ptr.p_u16[idx], u16, ctrl);
+	case V4L2_CTRL_TYPE_U32:
+		return ROUND_TO_RANGE(ptr.p_u32[idx], u32, ctrl);
 
 	case V4L2_CTRL_TYPE_BOOLEAN:
 		ptr.p_s32[idx] = !!ptr.p_s32[idx];
@@ -1578,6 +1608,7 @@ static int check_range(enum v4l2_ctrl_type type,
 		/* fall through */
 	case V4L2_CTRL_TYPE_U8:
 	case V4L2_CTRL_TYPE_U16:
+	case V4L2_CTRL_TYPE_U32:
 	case V4L2_CTRL_TYPE_INTEGER:
 	case V4L2_CTRL_TYPE_INTEGER64:
 		if (step == 0 || min > max || def < min || def > max)
@@ -1893,6 +1924,9 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
 	case V4L2_CTRL_TYPE_U16:
 		elem_size = sizeof(u16);
 		break;
+	case V4L2_CTRL_TYPE_U32:
+		elem_size = sizeof(u32);
+		break;
 	default:
 		if (type < V4L2_CTRL_COMPOUND_TYPES)
 			elem_size = sizeof(s32);
@@ -3248,6 +3282,7 @@ int __v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl,
 	case V4L2_CTRL_TYPE_BITMASK:
 	case V4L2_CTRL_TYPE_U8:
 	case V4L2_CTRL_TYPE_U16:
+	case V4L2_CTRL_TYPE_U32:
 		if (ctrl->is_array)
 			return -EINVAL;
 		ret = check_range(ctrl->type, min, max, step, def);
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index d6540d201764..b7cd7a665e35 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -41,6 +41,7 @@ struct poll_table_struct;
  * @p_s64:	Pointer to a 64-bit signed value.
  * @p_u8:	Pointer to a 8-bit unsigned value.
  * @p_u16:	Pointer to a 16-bit unsigned value.
+ * @p_u32:	Pointer to a 32-bit unsigned value.
  * @p_char:	Pointer to a string.
  * @p:		Pointer to a compound value.
  */
@@ -49,6 +50,7 @@ union v4l2_ctrl_ptr {
 	s64 *p_s64;
 	u8 *p_u8;
 	u16 *p_u16;
+	u32 *p_u32;
 	char *p_char;
 	void *p;
 };
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index db526d1c8309..a13e6fef987e 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -757,6 +757,15 @@ enum v4l2_auto_focus_range {
 #define V4L2_CID_RDS_TX_PTY			(V4L2_CID_FM_TX_CLASS_BASE + 3)
 #define V4L2_CID_RDS_TX_PS_NAME			(V4L2_CID_FM_TX_CLASS_BASE + 5)
 #define V4L2_CID_RDS_TX_RADIO_TEXT		(V4L2_CID_FM_TX_CLASS_BASE + 6)
+#define V4L2_CID_RDS_TX_MONO_STEREO		(V4L2_CID_FM_TX_CLASS_BASE + 7)
+#define V4L2_CID_RDS_TX_ARTIFICIAL_HEAD		(V4L2_CID_FM_TX_CLASS_BASE + 8)
+#define V4L2_CID_RDS_TX_COMPRESSED		(V4L2_CID_FM_TX_CLASS_BASE + 9)
+#define V4L2_CID_RDS_TX_DYNAMIC_PTY		(V4L2_CID_FM_TX_CLASS_BASE + 10)
+#define V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT	(V4L2_CID_FM_TX_CLASS_BASE + 11)
+#define V4L2_CID_RDS_TX_TRAFFIC_PROGRAM		(V4L2_CID_FM_TX_CLASS_BASE + 12)
+#define V4L2_CID_RDS_TX_MUSIC_SPEECH		(V4L2_CID_FM_TX_CLASS_BASE + 13)
+#define V4L2_CID_RDS_TX_ALT_FREQS_ENABLE	(V4L2_CID_FM_TX_CLASS_BASE + 14)
+#define V4L2_CID_RDS_TX_ALT_FREQS		(V4L2_CID_FM_TX_CLASS_BASE + 15)
 
 #define V4L2_CID_AUDIO_LIMITER_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 64)
 #define V4L2_CID_AUDIO_LIMITER_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 65)
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 5fd42027877a..778a3298fb34 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1288,6 +1288,7 @@ struct v4l2_ext_control {
 		char *string;
 		__u8 *p_u8;
 		__u16 *p_u16;
+		__u32 *p_u32;
 		void *ptr;
 	};
 } __attribute__ ((packed));
@@ -1320,6 +1321,7 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_COMPOUND_TYPES     = 0x0100,
 	V4L2_CTRL_TYPE_U8	     = 0x0100,
 	V4L2_CTRL_TYPE_U16	     = 0x0101,
+	V4L2_CTRL_TYPE_U32	     = 0x0102,
 };
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
-- 
cgit v1.2.3


From 9570a14847a9d334a0baf5a5921da2dbc6b9f6d2 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 21 Jul 2014 10:45:40 -0300
Subject: [media] v4l2-ctrls: add RX RDS controls

The radio-miropcm20 driver has firmware that decodes the RDS signals. So in that
case the RDS data becomes available in the form of controls.

Add support for these controls to the control framework, allowing the miro driver
to use them.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 19 +++++++++++++++++--
 include/uapi/linux/v4l2-controls.h   |  6 ++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 4863cf05175d..2d8ced8ad80b 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -881,7 +881,6 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_FM_RX_CLASS:		return "FM Radio Receiver Controls";
 	case V4L2_CID_TUNE_DEEMPHASIS:		return "De-Emphasis";
 	case V4L2_CID_RDS_RECEPTION:		return "RDS Reception";
-
 	case V4L2_CID_RF_TUNER_CLASS:		return "RF Tuner Controls";
 	case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO:	return "LNA Gain, Auto";
 	case V4L2_CID_RF_TUNER_LNA_GAIN:	return "LNA Gain";
@@ -892,6 +891,12 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:	return "Bandwidth, Auto";
 	case V4L2_CID_RF_TUNER_BANDWIDTH:	return "Bandwidth";
 	case V4L2_CID_RF_TUNER_PLL_LOCK:	return "PLL Lock";
+	case V4L2_CID_RDS_RX_PTY:		return "RDS Program Type";
+	case V4L2_CID_RDS_RX_PS_NAME:		return "RDS PS Name";
+	case V4L2_CID_RDS_RX_RADIO_TEXT:	return "RDS Radio Text";
+	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement";
+	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:	return "RDS Traffic Program";
+	case V4L2_CID_RDS_RX_MUSIC_SPEECH:	return "RDS Music";
 
 	/* Detection controls */
 	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
@@ -900,7 +905,6 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: return "MD Global Threshold";
 	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:	return "MD Threshold Grid";
 	case V4L2_CID_DETECT_MD_REGION_GRID:	return "MD Region Grid";
-
 	default:
 		return NULL;
 	}
@@ -963,6 +967,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM:
 	case V4L2_CID_RDS_TX_MUSIC_SPEECH:
 	case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE:
+	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT:
+	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:
+	case V4L2_CID_RDS_RX_MUSIC_SPEECH:
 		*type = V4L2_CTRL_TYPE_BOOLEAN;
 		*min = 0;
 		*max = *step = 1;
@@ -1035,6 +1042,8 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 		break;
 	case V4L2_CID_RDS_TX_PS_NAME:
 	case V4L2_CID_RDS_TX_RADIO_TEXT:
+	case V4L2_CID_RDS_RX_PS_NAME:
+	case V4L2_CID_RDS_RX_RADIO_TEXT:
 		*type = V4L2_CTRL_TYPE_STRING;
 		break;
 	case V4L2_CID_ISO_SENSITIVITY:
@@ -1166,6 +1175,12 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_DV_TX_RXSENSE:
 	case V4L2_CID_DV_TX_EDID_PRESENT:
 	case V4L2_CID_DV_RX_POWER_PRESENT:
+	case V4L2_CID_RDS_RX_PTY:
+	case V4L2_CID_RDS_RX_PS_NAME:
+	case V4L2_CID_RDS_RX_RADIO_TEXT:
+	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT:
+	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:
+	case V4L2_CID_RDS_RX_MUSIC_SPEECH:
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
 		break;
 	case V4L2_CID_RF_TUNER_PLL_LOCK:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index a13e6fef987e..e946e43fb8d5 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -910,6 +910,12 @@ enum v4l2_deemphasis {
 };
 
 #define V4L2_CID_RDS_RECEPTION			(V4L2_CID_FM_RX_CLASS_BASE + 2)
+#define V4L2_CID_RDS_RX_PTY			(V4L2_CID_FM_RX_CLASS_BASE + 3)
+#define V4L2_CID_RDS_RX_PS_NAME			(V4L2_CID_FM_RX_CLASS_BASE + 4)
+#define V4L2_CID_RDS_RX_RADIO_TEXT		(V4L2_CID_FM_RX_CLASS_BASE + 5)
+#define V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT	(V4L2_CID_FM_RX_CLASS_BASE + 6)
+#define V4L2_CID_RDS_RX_TRAFFIC_PROGRAM		(V4L2_CID_FM_RX_CLASS_BASE + 7)
+#define V4L2_CID_RDS_RX_MUSIC_SPEECH		(V4L2_CID_FM_RX_CLASS_BASE + 8)
 
 #define V4L2_CID_RF_TUNER_CLASS_BASE		(V4L2_CTRL_CLASS_RF_TUNER | 0x900)
 #define V4L2_CID_RF_TUNER_CLASS			(V4L2_CTRL_CLASS_RF_TUNER | 1)
-- 
cgit v1.2.3


From 699a0ea0823d32030b0666b28ff8633960f7ffa7 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 2 Jun 2014 11:02:59 +1000
Subject: KVM: PPC: Book3S: Controls for in-kernel sPAPR hypercall handling

This provides a way for userspace controls which sPAPR hcalls get
handled in the kernel.  Each hcall can be individually enabled or
disabled for in-kernel handling, except for H_RTAS.  The exception
for H_RTAS is because userspace can already control whether
individual RTAS functions are handled in-kernel or not via the
KVM_PPC_RTAS_DEFINE_TOKEN ioctl, and because the numeric value for
H_RTAS is out of the normal sequence of hcall numbers.

Hcalls are enabled or disabled using the KVM_ENABLE_CAP ioctl for the
KVM_CAP_PPC_ENABLE_HCALL capability on the file descriptor for the VM.
The args field of the struct kvm_enable_cap specifies the hcall number
in args[0] and the enable/disable flag in args[1]; 0 means disable
in-kernel handling (so that the hcall will always cause an exit to
userspace) and 1 means enable.  Enabling or disabling in-kernel
handling of an hcall is effective across the whole VM.

The ability for KVM_ENABLE_CAP to be used on a VM file descriptor
on PowerPC is new, added by this commit.  The KVM_CAP_ENABLE_CAP_VM
capability advertises that this ability exists.

When a VM is created, an initial set of hcalls are enabled for
in-kernel handling.  The set that is enabled is the set that have
an in-kernel implementation at this point.  Any new hcall
implementations from this point onwards should not be added to the
default set without a good reason.

No distinction is made between real-mode and virtual-mode hcall
implementations; the one setting controls them both.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt       | 41 ++++++++++++++++++++++++--
 arch/powerpc/include/asm/kvm_book3s.h   |  1 +
 arch/powerpc/include/asm/kvm_host.h     |  2 ++
 arch/powerpc/kernel/asm-offsets.c       |  1 +
 arch/powerpc/kvm/book3s_hv.c            | 51 +++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 +++++++
 arch/powerpc/kvm/book3s_pr.c            |  5 ++++
 arch/powerpc/kvm/book3s_pr_papr.c       | 37 ++++++++++++++++++++++++
 arch/powerpc/kvm/powerpc.c              | 45 +++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h                |  1 +
 10 files changed, 193 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0fe36497642c..5c54d196f4c8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2863,8 +2863,8 @@ The fields in each entry are defined as follows:
          this function/index combination
 
 
-6. Capabilities that can be enabled
------------------------------------
+6. Capabilities that can be enabled on vCPUs
+--------------------------------------------
 
 There are certain capabilities that change the behavior of the virtual CPU when
 enabled. To enable them, please see section 4.37. Below you can find a list of
@@ -3002,3 +3002,40 @@ Parameters: args[0] is the XICS device fd
             args[1] is the XICS CPU number (server ID) for this vcpu
 
 This capability connects the vcpu to an in-kernel XICS device.
+
+
+7. Capabilities that can be enabled on VMs
+------------------------------------------
+
+There are certain capabilities that change the behavior of the virtual
+machine when enabled. To enable them, please see section 4.37. Below
+you can find a list of capabilities and what their effect on the VM
+is when enabling them.
+
+The following information is provided along with the description:
+
+  Architectures: which instruction set architectures provide this ioctl.
+      x86 includes both i386 and x86_64.
+
+  Parameters: what parameters are accepted by the capability.
+
+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+
+7.1 KVM_CAP_PPC_ENABLE_HCALL
+
+Architectures: ppc
+Parameters: args[0] is the sPAPR hcall number
+	    args[1] is 0 to disable, 1 to enable in-kernel handling
+
+This capability controls whether individual sPAPR hypercalls (hcalls)
+get handled by the kernel or not.  Enabling or disabling in-kernel
+handling of an hcall is effective across the VM.  On creation, an
+initial set of hcalls are enabled for in-kernel handling, which
+consists of those hcalls for which in-kernel handlers were implemented
+before this capability was implemented.  If disabled, the kernel will
+not to attempt to handle the hcall, but will always exit to userspace
+to handle it.  Note that it may not make sense to enable some and
+disable others of a group of related hcalls, but KVM does not prevent
+userspace from doing that.
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a20cc0bbd048..052ab2ad49b5 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -187,6 +187,7 @@ extern void kvmppc_hv_entry_trampoline(void);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
+extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
 extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 				 struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index f9ae69682ce1..62b2cee450a5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,6 +34,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
+#include <asm/hvcall.h>
 
 #define KVM_MAX_VCPUS		NR_CPUS
 #define KVM_MAX_VCORES		NR_CPUS
@@ -263,6 +264,7 @@ struct kvm_arch {
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
 	struct list_head rtas_tokens;
+	DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 #endif
 #ifdef CONFIG_KVM_MPIC
 	struct openpic *mpic;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f5995a912213..17ffcb4f27f9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -493,6 +493,7 @@ int main(void)
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
 	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
+	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
 	DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
 	DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1562acfa05bf..cf445d22570f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -67,6 +67,8 @@
 /* Used as a "null" value for timebase values */
 #define TB_NIL	(~(u64)0)
 
+static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
@@ -562,6 +564,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
+	if (req <= MAX_HCALL_OPCODE &&
+	    !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
+		return RESUME_HOST;
+
 	switch (req) {
 	case H_ENTER:
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -2269,6 +2275,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	 */
 	cpumask_setall(&kvm->arch.need_tlb_flush);
 
+	/* Start out with the default set of hcalls enabled */
+	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
+	       sizeof(kvm->arch.enabled_hcalls));
+
 	kvm->arch.rma = NULL;
 
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -2407,6 +2417,45 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 	return r;
 }
 
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_REMOVE,
+	H_ENTER,
+	H_READ,
+	H_PROTECT,
+	H_BULK_REMOVE,
+	H_GET_TCE,
+	H_PUT_TCE,
+	H_SET_DABR,
+	H_SET_XDABR,
+	H_CEDE,
+	H_PROD,
+	H_CONFER,
+	H_REGISTER_VPA,
+#ifdef CONFIG_KVM_XICS
+	H_EOI,
+	H_CPPR,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR,
+	H_XIRR_X,
+#endif
+	0
+};
+
+static void init_default_hcalls(void)
+{
+	int i;
+
+	for (i = 0; default_hcall_list[i]; ++i)
+		__set_bit(default_hcall_list[i] / 4, default_enabled_hcalls);
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -2454,6 +2503,8 @@ static int kvmppc_book3s_init_hv(void)
 	kvm_ops_hv.owner = THIS_MODULE;
 	kvmppc_hv_ops = &kvm_ops_hv;
 
+	init_default_hcalls();
+
 	r = kvmppc_mmu_hv_init();
 	return r;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 64ac56f6c3fe..33aaadef7139 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1909,6 +1909,17 @@ hcall_try_real_mode:
 	clrrdi	r3,r3,2
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
+	/* See if this hcall is enabled for in-kernel handling */
+	ld	r4, VCPU_KVM(r9)
+	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
+	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
+	add	r4, r4, r0
+	ld	r0, KVM_ENABLED_HCALLS(r4)
+	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
+	srd	r0, r0, r4
+	andi.	r0, r0, 1
+	beq	guest_exit_cont
+	/* Get pointer to handler, if any, and call it */
 	LOAD_REG_ADDR(r4, hcall_real_table)
 	lwax	r3,r3,r4
 	cmpwi	r3,0
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3b82e8616dfa..123ac7dc5e1f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1597,6 +1597,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 {
 	mutex_init(&kvm->arch.hpt_mutex);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Start out with the default set of hcalls enabled */
+	kvmppc_pr_init_default_hcalls(kvm);
+#endif
+
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		spin_lock(&kvm_global_user_count_lock);
 		if (++kvm_global_user_count == 1)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index f7c25c625a5b..eacaa6e4876e 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -267,6 +267,10 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 
 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 {
+	if (cmd <= MAX_HCALL_OPCODE &&
+	    !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls))
+		return EMULATE_FAIL;
+
 	switch (cmd) {
 	case H_ENTER:
 		return kvmppc_h_pr_enter(vcpu);
@@ -304,3 +308,36 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 
 	return EMULATE_FAIL;
 }
+
+
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_ENTER,
+	H_REMOVE,
+	H_PROTECT,
+	H_BULK_REMOVE,
+	H_PUT_TCE,
+	H_CEDE,
+#ifdef CONFIG_KVM_XICS
+	H_XIRR,
+	H_CPPR,
+	H_EOI,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR_X,
+#endif
+	0
+};
+
+void kvmppc_pr_init_default_hcalls(struct kvm *kvm)
+{
+	int i;
+
+	for (i = 0; default_hcall_list[i]; ++i)
+		__set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls);
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 61c738ab1283..3222a4d08a6f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -387,6 +387,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_PPC_IRQ_LEVEL:
 	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
@@ -417,6 +418,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_ALLOC_HTAB:
 	case KVM_CAP_PPC_RTAS:
 	case KVM_CAP_PPC_FIXUP_HCALL:
+	case KVM_CAP_PPC_ENABLE_HCALL:
 #ifdef CONFIG_KVM_XICS
 	case KVM_CAP_IRQ_XICS:
 #endif
@@ -1099,6 +1101,40 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 	return 0;
 }
 
+
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+				   struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	case KVM_CAP_PPC_ENABLE_HCALL: {
+		unsigned long hcall = cap->args[0];
+
+		r = -EINVAL;
+		if (hcall > MAX_HCALL_OPCODE || (hcall & 3) ||
+		    cap->args[1] > 1)
+			break;
+		if (cap->args[1])
+			set_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		else
+			clear_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		r = 0;
+		break;
+	}
+#endif
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
@@ -1118,6 +1154,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 		break;
 	}
+	case KVM_ENABLE_CAP:
+	{
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CREATE_SPAPR_TCE: {
 		struct kvm_create_spapr_tce create_tce;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e11d8f170a62..0418b746cb68 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -758,6 +758,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_VM_ATTRIBUTES 101
 #define KVM_CAP_ARM_PSCI_0_2 102
 #define KVM_CAP_PPC_FIXUP_HCALL 103
+#define KVM_CAP_PPC_ENABLE_HCALL 104
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From 92b591a4c46b103ebd3fc0d03a084e1efd331253 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:33:08 +0200
Subject: KVM: Allow KVM_CHECK_EXTENSION on the vm fd

The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately
on PPC some of the capabilities change depending on the way a VM was created.

So instead we need a way to expose capabilities as VM ioctl, so that we can
see which VM type we're using (HV or PR). To enable this, add the
KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  7 +++--
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/kvm_main.c               | 58 +++++++++++++++++++++------------------
 3 files changed, 37 insertions(+), 29 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 884f819e1908..8898caf8c090 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
 4.4 KVM_CHECK_EXTENSION
 
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
 Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
 Parameters: extension identifier (KVM_CAP_*)
 Returns: 0 if unsupported; 1 (or some other positive integer) if supported
 
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
 Generally 0 means no and 1 means yes, but some extensions may report
 additional information in the integer return value.
 
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
 
 4.5 KVM_GET_VCPU_MMAP_SIZE
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0418b746cb68..51776cac6a9b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -759,6 +759,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_PSCI_0_2 102
 #define KVM_CAP_PPC_FIXUP_HCALL 103
 #define KVM_CAP_PPC_ENABLE_HCALL 104
+#define KVM_CAP_CHECK_EXTENSION_VM 105
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e28f3caa539d..1b95cc926cfc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2324,6 +2324,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	return 0;
 }
 
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+{
+	switch (arg) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
+	case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+	case KVM_CAP_SIGNAL_MSI:
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQFD_RESAMPLE:
+#endif
+	case KVM_CAP_CHECK_EXTENSION_VM:
+		return 1;
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQ_ROUTING:
+		return KVM_MAX_IRQ_ROUTES;
+#endif
+	default:
+		break;
+	}
+	return kvm_vm_ioctl_check_extension(kvm, arg);
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2487,6 +2515,9 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_CHECK_EXTENSION:
+		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -2571,33 +2602,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
-{
-	switch (arg) {
-	case KVM_CAP_USER_MEMORY:
-	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
-	case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
-	case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
-	case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQFD_RESAMPLE:
-#endif
-		return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQ_ROUTING:
-		return KVM_MAX_IRQ_ROUTES;
-#endif
-	default:
-		break;
-	}
-	return kvm_vm_ioctl_check_extension(kvm, arg);
-}
-
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
-- 
cgit v1.2.3


From ce91ddc471b77ec75e5b2a43c803efac605f37b3 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 28 Jul 2014 19:29:13 +0200
Subject: KVM: PPC: Remove DCR handling

DCR handling was only needed for 440 KVM. Since we removed it, we can also
remove handling of DCR accesses.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt   |  6 +++---
 arch/powerpc/include/asm/kvm_host.h |  4 ----
 arch/powerpc/include/asm/kvm_ppc.h  |  1 -
 arch/powerpc/kvm/booke.c            |  5 -----
 arch/powerpc/kvm/powerpc.c          | 10 ----------
 arch/powerpc/kvm/timing.c           |  1 -
 arch/powerpc/kvm/timing.h           |  3 ---
 include/uapi/linux/kvm.h            |  4 ++--
 8 files changed, 5 insertions(+), 29 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 8898caf8c090..a21ff2265c21 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2613,8 +2613,8 @@ The 'data' member contains, in its first 'len' bytes, the value as it would
 appear if the VCPU performed a load or store of the appropriate width directly
 to the byte array.
 
-NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR,
-      KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding
+NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI KVM_EXIT_PAPR and
+      KVM_EXIT_EPR the corresponding
 operations are complete (and guest state is consistent) only after userspace
 has re-entered the kernel with KVM_RUN.  The kernel side will first finish
 incomplete operations and then check for pending signals.  Userspace
@@ -2685,7 +2685,7 @@ Principles of Operation Book in the Chapter for Dynamic Address Translation
 			__u8  is_write;
 		} dcr;
 
-powerpc specific.
+Deprecated - was used for 440 KVM.
 
 		/* KVM_EXIT_OSI */
 		struct {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 66f5b5914e6c..98d9dd50d063 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -94,7 +94,6 @@ struct kvm_vm_stat {
 struct kvm_vcpu_stat {
 	u32 sum_exits;
 	u32 mmio_exits;
-	u32 dcr_exits;
 	u32 signal_exits;
 	u32 light_exits;
 	/* Account for special types of light exits: */
@@ -126,7 +125,6 @@ struct kvm_vcpu_stat {
 
 enum kvm_exit_types {
 	MMIO_EXITS,
-	DCR_EXITS,
 	SIGNAL_EXITS,
 	ITLB_REAL_MISS_EXITS,
 	ITLB_VIRT_MISS_EXITS,
@@ -601,8 +599,6 @@ struct kvm_vcpu_arch {
 	u8 io_gpr; /* GPR used as IO source/target */
 	u8 mmio_is_bigendian;
 	u8 mmio_sign_extend;
-	u8 dcr_needed;
-	u8 dcr_is_write;
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index cbee4538307f..8e36c1e2c631 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -41,7 +41,6 @@
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
-	EMULATE_DO_DCR,       /* kvm_run filled with DCR request */
 	EMULATE_FAIL,         /* can't emulate this instruction */
 	EMULATE_AGAIN,        /* something went wrong. go again */
 	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index f30948a17c03..b4c89fa6f109 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers;
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmio",       VCPU_STAT(mmio_exits) },
-	{ "dcr",        VCPU_STAT(dcr_exits) },
 	{ "sig",        VCPU_STAT(signal_exits) },
 	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
 	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
@@ -709,10 +708,6 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	case EMULATE_AGAIN:
 		return RESUME_GUEST;
 
-	case EMULATE_DO_DCR:
-		run->exit_reason = KVM_EXIT_DCR;
-		return RESUME_HOST;
-
 	case EMULATE_FAIL:
 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c14ed15fd60b..288b4bb05cbd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -743,12 +743,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif
 }
 
-static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
-                                     struct kvm_run *run)
-{
-	kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
-}
-
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
@@ -945,10 +939,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (!vcpu->mmio_is_write)
 			kvmppc_complete_mmio_load(vcpu, run);
 		vcpu->mmio_needed = 0;
-	} else if (vcpu->arch.dcr_needed) {
-		if (!vcpu->arch.dcr_is_write)
-			kvmppc_complete_dcr_load(vcpu, run);
-		vcpu->arch.dcr_needed = 0;
 	} else if (vcpu->arch.osi_needed) {
 		u64 *gprs = run->osi.gprs;
 		int i;
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 07b6110a4bb7..e44d2b2ea97e 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
 
 static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
 	[MMIO_EXITS] =              "MMIO",
-	[DCR_EXITS] =               "DCR",
 	[SIGNAL_EXITS] =            "SIGNAL",
 	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
 	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index bf191e72b2d8..3123690c82dc 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 	case EMULATED_INST_EXITS:
 		vcpu->stat.emulated_inst_exits++;
 		break;
-	case DCR_EXITS:
-		vcpu->stat.dcr_exits++;
-		break;
 	case DSI_EXITS:
 		vcpu->stat.dsi_exits++;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 51776cac6a9b..f6f24aeb9e1a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -162,7 +162,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_TPR_ACCESS       12
 #define KVM_EXIT_S390_SIEIC       13
 #define KVM_EXIT_S390_RESET       14
-#define KVM_EXIT_DCR              15
+#define KVM_EXIT_DCR              15 /* deprecated */
 #define KVM_EXIT_NMI              16
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI              18
@@ -268,7 +268,7 @@ struct kvm_run {
 			__u64 trans_exc_code;
 			__u32 pgm_code;
 		} s390_ucontrol;
-		/* KVM_EXIT_DCR */
+		/* KVM_EXIT_DCR (deprecated) */
 		struct {
 			__u32 dcrn;
 			__u32 data;
-- 
cgit v1.2.3


From 68a360e82e55c9b35097e7be7f7991d8f401032f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 25 Jul 2014 18:01:31 -0400
Subject: packet: remove deprecated syststamp timestamp

No device driver will ever return an skb_shared_info structure with
syststamp non-zero, so remove the branch that tests for this and
optionally marks the packet timestamp as TP_STATUS_TS_SYS_HARDWARE.

Do not remove the definition TP_STATUS_TS_SYS_HARDWARE, as processes
may refer to it.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/packet_mmap.txt | 18 ++++++------------
 include/uapi/linux/if_packet.h           |  2 +-
 net/packet/af_packet.c                   | 12 ++++--------
 3 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 38112d512f47..a6d7cb91069e 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -1008,14 +1008,9 @@ hardware timestamps to be used. Note: you may need to enable the generation
 of hardware timestamps with SIOCSHWTSTAMP (see related information from
 Documentation/networking/timestamping.txt).
 
-PACKET_TIMESTAMP accepts the same integer bit field as
-SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE
-and SOF_TIMESTAMPING_RAW_HARDWARE values are recognized by
-PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
-SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.
-
-    int req = 0;
-    req |= SOF_TIMESTAMPING_SYS_HARDWARE;
+PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING:
+
+    int req = SOF_TIMESTAMPING_RAW_HARDWARE;
     setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
 
 For the mmap(2)ed ring buffers, such timestamps are stored in the
@@ -1023,14 +1018,13 @@ tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine
 what kind of timestamp has been reported, the tp_status field is binary |'ed
 with the following possible bits ...
 
-    TP_STATUS_TS_SYS_HARDWARE
     TP_STATUS_TS_RAW_HARDWARE
     TP_STATUS_TS_SOFTWARE
 
 ... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the
-RX_RING, if none of those 3 are set (i.e. PACKET_TIMESTAMP is not set),
-then this means that a software fallback was invoked *within* PF_PACKET's
-processing code (less precise).
+RX_RING, if neither is set (i.e. PACKET_TIMESTAMP is not set), then a
+software fallback was invoked *within* PF_PACKET's processing code (less
+precise).
 
 Getting timestamps for the TX_RING works as follows: i) fill the ring frames,
 ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index bac27fa05f5b..da2d668b8cf1 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -108,7 +108,7 @@ struct tpacket_auxdata {
 
 /* Rx and Tx ring - header status */
 #define TP_STATUS_TS_SOFTWARE		(1 << 29)
-#define TP_STATUS_TS_SYS_HARDWARE	(1 << 30)
+#define TP_STATUS_TS_SYS_HARDWARE	(1 << 30) /* deprecated, never set */
 #define TP_STATUS_TS_RAW_HARDWARE	(1 << 31)
 
 /* Rx ring - feature request bits */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 614ca91f785a..8d9f8042705a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -441,14 +441,10 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
 {
 	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 
-	if (shhwtstamps) {
-		if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
-			return TP_STATUS_TS_SYS_HARDWARE;
-		if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
-		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
-			return TP_STATUS_TS_RAW_HARDWARE;
-	}
+	if (shhwtstamps &&
+	    (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
+		return TP_STATUS_TS_RAW_HARDWARE;
 
 	if (ktime_to_timespec_cond(skb->tstamp, ts))
 		return TP_STATUS_TS_SOFTWARE;
-- 
cgit v1.2.3


From 16eecd9be4b05e3216b8b12707aa6f51fb197903 Mon Sep 17 00:00:00 2001
From: Anish Bhatt <anish@chelsio.com>
Date: Mon, 28 Jul 2014 20:57:07 -0700
Subject: dcbnl : Fix misleading dcb_app->priority explanation

Current explanation of dcb_app->priority is wrong. It says priority is
expected to be a 3-bit unsigned integer which is only true when working with
DCBx-IEEE. Use of dcb_app->priority by DCBx-CEE expects it to be 802.1p user
priority bitmap. Updated accordingly

This affects the cxgb4 driver, but I will post those changes as part of a
larger changeset shortly.

Fixes: 3e29027af4372 ("dcbnl: add support for ieee8021Qaz attributes")
Signed-off-by: Anish Bhatt <anish@chelsio.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/dcbnl.h | 3 ++-
 net/dcb/dcbnl.c            | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index 6bb43382f3f3..e711f20dc522 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -148,7 +148,8 @@ struct cee_pfc {
  *
  * @selector: protocol identifier type
  * @protocol: protocol of type indicated
- * @priority: 3-bit unsigned integer indicating priority
+ * @priority: 3-bit unsigned integer indicating priority for IEEE
+ *            8-bit 802.1p user priority bitmap for CEE
  *
  * ----
  *  Selector field values
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index c34af7a1d2d4..ca11d283bbeb 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1776,7 +1776,7 @@ EXPORT_SYMBOL(dcb_getapp);
  *
  * Priority 0 is an invalid priority in CEE spec. This routine
  * removes applications from the app list if the priority is
- * set to zero.
+ * set to zero. Priority is expected to be 8-bit 802.1p user priority bitmap
  */
 int dcb_setapp(struct net_device *dev, struct dcb_app *new)
 {
@@ -1837,7 +1837,8 @@ EXPORT_SYMBOL(dcb_ieee_getapp_mask);
  *
  * This adds Application data to the list. Multiple application
  * entries may exists for the same selector and protocol as long
- * as the priorities are different.
+ * as the priorities are different. Priority is expected to be a
+ * 3-bit unsigned integer
  */
 int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
 {
-- 
cgit v1.2.3


From e10038a8ec06ac819b7552bb67aaa6d2d6f850c1 Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Tue, 29 Jul 2014 18:12:15 +0200
Subject: netfilter: xt_bpf: add mising opaque struct sk_filter definition

This structure is not exposed to userspace, so fix this by defining
struct sk_filter; so we skip the casting in kernelspace. This is safe
since userspace has no way to lurk with that internal pointer.

Fixes: e6f30c7 ("netfilter: x_tables: add xt_bpf match")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netfilter/xt_bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index 5dda450eb55b..2ec9fbcd06f9 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -6,6 +6,8 @@
 
 #define XT_BPF_MAX_NUM_INSTR	64
 
+struct sk_filter;
+
 struct xt_bpf_info {
 	__u16 bpf_program_num_elem;
 	struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
-- 
cgit v1.2.3


From 7ae457c1e5b45a1b826fad9d62b32191d2bdcfdb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 30 Jul 2014 20:34:16 -0700
Subject: net: filter: split 'struct sk_filter' into socket and bpf parts

clean up names related to socket filtering and bpf in the following way:
- everything that deals with sockets keeps 'sk_*' prefix
- everything that is pure BPF is changed to 'bpf_*' prefix

split 'struct sk_filter' into
struct sk_filter {
	atomic_t        refcnt;
	struct rcu_head rcu;
	struct bpf_prog *prog;
};
and
struct bpf_prog {
        u32                     jited:1,
                                len:31;
        struct sock_fprog_kern  *orig_prog;
        unsigned int            (*bpf_func)(const struct sk_buff *skb,
                                            const struct bpf_insn *filter);
        union {
                struct sock_filter      insns[0];
                struct bpf_insn         insnsi[0];
                struct work_struct      work;
        };
};
so that 'struct bpf_prog' can be used independent of sockets and cleans up
'unattached' bpf use cases

split SK_RUN_FILTER macro into:
    SK_RUN_FILTER to be used with 'struct sk_filter *' and
    BPF_PROG_RUN to be used with 'struct bpf_prog *'

__sk_filter_release(struct sk_filter *) gains
__bpf_prog_release(struct bpf_prog *) helper function

also perform related renames for the functions that work
with 'struct bpf_prog *', since they're on the same lines:

sk_filter_size -> bpf_prog_size
sk_filter_select_runtime -> bpf_prog_select_runtime
sk_filter_free -> bpf_prog_free
sk_unattached_filter_create -> bpf_prog_create
sk_unattached_filter_destroy -> bpf_prog_destroy
sk_store_orig_filter -> bpf_prog_store_orig_filter
sk_release_orig_filter -> bpf_release_orig_filter
__sk_migrate_filter -> bpf_migrate_filter
__sk_prepare_filter -> bpf_prepare_filter

API for attaching classic BPF to a socket stays the same:
sk_attach_filter(prog, struct sock *)/sk_detach_filter(struct sock *)
and SK_RUN_FILTER(struct sk_filter *, ctx) to execute a program
which is used by sockets, tun, af_packet

API for 'unattached' BPF programs becomes:
bpf_prog_create(struct bpf_prog **)/bpf_prog_destroy(struct bpf_prog *)
and BPF_PROG_RUN(struct bpf_prog *, ctx) to execute a program
which is used by isdn, ppp, team, seccomp, ptp, xt_bpf, cls_bpf, test_bpf

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt      | 10 ++--
 arch/arm/net/bpf_jit_32.c                |  8 +--
 arch/mips/net/bpf_jit.c                  |  8 +--
 arch/powerpc/net/bpf_jit_comp.c          |  8 +--
 arch/s390/net/bpf_jit_comp.c             |  4 +-
 arch/sparc/net/bpf_jit_comp.c            |  4 +-
 arch/x86/net/bpf_jit_comp.c              | 12 ++---
 drivers/isdn/i4l/isdn_ppp.c              | 26 +++++----
 drivers/net/ppp/ppp_generic.c            | 28 +++++-----
 drivers/net/team/team_mode_loadbalance.c | 14 ++---
 include/linux/filter.h                   | 40 ++++++++------
 include/linux/isdn_ppp.h                 |  4 +-
 include/uapi/linux/netfilter/xt_bpf.h    |  4 +-
 kernel/bpf/core.c                        | 30 +++++------
 kernel/seccomp.c                         | 10 ++--
 lib/test_bpf.c                           | 24 ++++-----
 net/core/filter.c                        | 92 ++++++++++++++++++--------------
 net/core/ptp_classifier.c                |  6 +--
 net/core/sock_diag.c                     |  2 +-
 net/netfilter/xt_bpf.c                   |  6 +--
 net/sched/cls_bpf.c                      | 12 ++---
 21 files changed, 183 insertions(+), 169 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 712068be8171..c48a9704bda8 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -586,11 +586,11 @@ team driver's classifier for its load-balancing mode, netfilter's xt_bpf
 extension, PTP dissector/classifier, and much more. They are all internally
 converted by the kernel into the new instruction set representation and run
 in the eBPF interpreter. For in-kernel handlers, this all works transparently
-by using sk_unattached_filter_create() for setting up the filter, resp.
-sk_unattached_filter_destroy() for destroying it. The macro
-SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed
-code to run the filter. 'filter' is a pointer to struct sk_filter that we
-got from sk_unattached_filter_create(), and 'ctx' the given context (e.g.
+by using bpf_prog_create() for setting up the filter, resp.
+bpf_prog_destroy() for destroying it. The macro
+BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
+code to run the filter. 'filter' is a pointer to struct bpf_prog that we
+got from bpf_prog_create(), and 'ctx' the given context (e.g.
 skb pointer). All constraints and restrictions from bpf_check_classic() apply
 before a conversion to the new layout is being done behind the scenes!
 
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index fb5503ce016f..a37b989a2f91 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -56,7 +56,7 @@
 #define FLAG_NEED_X_RESET	(1 << 0)
 
 struct jit_ctx {
-	const struct sk_filter *skf;
+	const struct bpf_prog *skf;
 	unsigned idx;
 	unsigned prologue_bytes;
 	int ret0_fp_idx;
@@ -465,7 +465,7 @@ static inline void update_on_xread(struct jit_ctx *ctx)
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
-	const struct sk_filter *prog = ctx->skf;
+	const struct bpf_prog *prog = ctx->skf;
 	const struct sock_filter *inst;
 	unsigned i, load_order, off, condt;
 	int imm12;
@@ -857,7 +857,7 @@ b_epilogue:
 }
 
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct jit_ctx ctx;
 	unsigned tmp_idx;
@@ -926,7 +926,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index b87390a56a2f..05a56619ece2 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -131,7 +131,7 @@
  * @target:		Memory location for the compiled filter
  */
 struct jit_ctx {
-	const struct sk_filter *skf;
+	const struct bpf_prog *skf;
 	unsigned int prologue_bytes;
 	u32 idx;
 	u32 flags;
@@ -789,7 +789,7 @@ static int pkt_type_offset(void)
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
-	const struct sk_filter *prog = ctx->skf;
+	const struct bpf_prog *prog = ctx->skf;
 	const struct sock_filter *inst;
 	unsigned int i, off, load_order, condt;
 	u32 k, b_off __maybe_unused;
@@ -1369,7 +1369,7 @@ jmp_cmp:
 
 int bpf_jit_enable __read_mostly;
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct jit_ctx ctx;
 	unsigned int alloc_size, tmp_idx;
@@ -1423,7 +1423,7 @@ out:
 	kfree(ctx.offsets);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 82e82cadcde5..3afa6f4c1957 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -25,7 +25,7 @@ static inline void bpf_flush_icache(void *start, void *end)
 	flush_icache_range((unsigned long)start, (unsigned long)end);
 }
 
-static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
+static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
 				   struct codegen_context *ctx)
 {
 	int i;
@@ -121,7 +121,7 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
 /* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
+static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			      struct codegen_context *ctx,
 			      unsigned int *addrs)
 {
@@ -569,7 +569,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 	return 0;
 }
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	unsigned int proglen;
 	unsigned int alloclen;
@@ -693,7 +693,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index a2cbd875543a..61e45b7c04d7 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -812,7 +812,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
 	return header;
 }
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct bpf_binary_header *header = NULL;
 	unsigned long size, prg_len, lit_len;
@@ -875,7 +875,7 @@ out:
 	kfree(addrs);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 892a102671ad..1f76c22a6a75 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -354,7 +354,7 @@ do {	*prog++ = BR_OPC | WDISP22(OFF);		\
  * emit_jump() calls with adjusted offsets.
  */
 
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
 {
 	unsigned int cleanup_addr, proglen, oldproglen = 0;
 	u32 temp[8], *prog, *func, seen = 0, pass;
@@ -808,7 +808,7 @@ out:
 	return;
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index e2ecc1380b3d..5c8cb8043c5a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -211,7 +211,7 @@ struct jit_context {
 	bool seen_ld_abs;
 };
 
-static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
 	struct bpf_insn *insn = bpf_prog->insnsi;
@@ -841,7 +841,7 @@ common_load:		ctx->seen_ld_abs = true;
 			/* By design x64 JIT should support all BPF instructions
 			 * This error will be seen if new instruction was added
 			 * to interpreter, but not to JIT
-			 * or if there is junk in sk_filter
+			 * or if there is junk in bpf_prog
 			 */
 			pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
 			return -EINVAL;
@@ -862,11 +862,11 @@ common_load:		ctx->seen_ld_abs = true;
 	return proglen;
 }
 
-void bpf_jit_compile(struct sk_filter *prog)
+void bpf_jit_compile(struct bpf_prog *prog)
 {
 }
 
-void bpf_int_jit_compile(struct sk_filter *prog)
+void bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	struct bpf_binary_header *header = NULL;
 	int proglen, oldproglen = 0;
@@ -932,7 +932,7 @@ out:
 
 static void bpf_jit_free_deferred(struct work_struct *work)
 {
-	struct sk_filter *fp = container_of(work, struct sk_filter, work);
+	struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
 
@@ -941,7 +941,7 @@ static void bpf_jit_free_deferred(struct work_struct *work)
 	kfree(fp);
 }
 
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited) {
 		INIT_WORK(&fp->work, bpf_jit_free_deferred);
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 62f0688d45a5..c4198fa490bf 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -379,12 +379,12 @@ isdn_ppp_release(int min, struct file *file)
 #endif
 #ifdef CONFIG_IPPP_FILTER
 	if (is->pass_filter) {
-		sk_unattached_filter_destroy(is->pass_filter);
+		bpf_prog_destroy(is->pass_filter);
 		is->pass_filter = NULL;
 	}
 
 	if (is->active_filter) {
-		sk_unattached_filter_destroy(is->active_filter);
+		bpf_prog_destroy(is->active_filter);
 		is->active_filter = NULL;
 	}
 #endif
@@ -639,12 +639,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 		fprog.filter = code;
 
 		if (is->pass_filter) {
-			sk_unattached_filter_destroy(is->pass_filter);
+			bpf_prog_destroy(is->pass_filter);
 			is->pass_filter = NULL;
 		}
 		if (fprog.filter != NULL)
-			err = sk_unattached_filter_create(&is->pass_filter,
-							  &fprog);
+			err = bpf_prog_create(&is->pass_filter, &fprog);
 		else
 			err = 0;
 		kfree(code);
@@ -664,12 +663,11 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 		fprog.filter = code;
 
 		if (is->active_filter) {
-			sk_unattached_filter_destroy(is->active_filter);
+			bpf_prog_destroy(is->active_filter);
 			is->active_filter = NULL;
 		}
 		if (fprog.filter != NULL)
-			err = sk_unattached_filter_create(&is->active_filter,
-							  &fprog);
+			err = bpf_prog_create(&is->active_filter, &fprog);
 		else
 			err = 0;
 		kfree(code);
@@ -1174,14 +1172,14 @@ isdn_ppp_push_higher(isdn_net_dev *net_dev, isdn_net_local *lp, struct sk_buff *
 	}
 
 	if (is->pass_filter
-	    && SK_RUN_FILTER(is->pass_filter, skb) == 0) {
+	    && BPF_PROG_RUN(is->pass_filter, skb) == 0) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: inbound frame filtered.\n");
 		kfree_skb(skb);
 		return;
 	}
 	if (!(is->active_filter
-	      && SK_RUN_FILTER(is->active_filter, skb) == 0)) {
+	      && BPF_PROG_RUN(is->active_filter, skb) == 0)) {
 		if (is->debug & 0x2)
 			printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n");
 		lp->huptimer = 0;
@@ -1320,14 +1318,14 @@ isdn_ppp_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if (ipt->pass_filter
-	    && SK_RUN_FILTER(ipt->pass_filter, skb) == 0) {
+	    && BPF_PROG_RUN(ipt->pass_filter, skb) == 0) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: outbound frame filtered.\n");
 		kfree_skb(skb);
 		goto unlock;
 	}
 	if (!(ipt->active_filter
-	      && SK_RUN_FILTER(ipt->active_filter, skb) == 0)) {
+	      && BPF_PROG_RUN(ipt->active_filter, skb) == 0)) {
 		if (ipt->debug & 0x4)
 			printk(KERN_DEBUG "IPPP: link-active filter: resetting huptimer.\n");
 		lp->huptimer = 0;
@@ -1517,9 +1515,9 @@ int isdn_ppp_autodial_filter(struct sk_buff *skb, isdn_net_local *lp)
 	}
 
 	drop |= is->pass_filter
-		&& SK_RUN_FILTER(is->pass_filter, skb) == 0;
+		&& BPF_PROG_RUN(is->pass_filter, skb) == 0;
 	drop |= is->active_filter
-		&& SK_RUN_FILTER(is->active_filter, skb) == 0;
+		&& BPF_PROG_RUN(is->active_filter, skb) == 0;
 
 	skb_push(skb, IPPP_MAX_HEADER - 4);
 	return drop;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 765248b42a0a..fa0d71727894 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -143,8 +143,8 @@ struct ppp {
 	struct sk_buff_head mrq;	/* MP: receive reconstruction queue */
 #endif /* CONFIG_PPP_MULTILINK */
 #ifdef CONFIG_PPP_FILTER
-	struct sk_filter *pass_filter;	/* filter for packets to pass */
-	struct sk_filter *active_filter;/* filter for pkts to reset idle */
+	struct bpf_prog *pass_filter;	/* filter for packets to pass */
+	struct bpf_prog *active_filter; /* filter for pkts to reset idle */
 #endif /* CONFIG_PPP_FILTER */
 	struct net	*ppp_net;	/* the net we belong to */
 	struct ppp_link_stats stats64;	/* 64 bit network stats */
@@ -762,12 +762,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 			ppp_lock(ppp);
 			if (ppp->pass_filter) {
-				sk_unattached_filter_destroy(ppp->pass_filter);
+				bpf_prog_destroy(ppp->pass_filter);
 				ppp->pass_filter = NULL;
 			}
 			if (fprog.filter != NULL)
-				err = sk_unattached_filter_create(&ppp->pass_filter,
-								  &fprog);
+				err = bpf_prog_create(&ppp->pass_filter,
+						      &fprog);
 			else
 				err = 0;
 			kfree(code);
@@ -788,12 +788,12 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 			ppp_lock(ppp);
 			if (ppp->active_filter) {
-				sk_unattached_filter_destroy(ppp->active_filter);
+				bpf_prog_destroy(ppp->active_filter);
 				ppp->active_filter = NULL;
 			}
 			if (fprog.filter != NULL)
-				err = sk_unattached_filter_create(&ppp->active_filter,
-								  &fprog);
+				err = bpf_prog_create(&ppp->active_filter,
+						      &fprog);
 			else
 				err = 0;
 			kfree(code);
@@ -1205,7 +1205,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		   a four-byte PPP header on each packet */
 		*skb_push(skb, 2) = 1;
 		if (ppp->pass_filter &&
-		    SK_RUN_FILTER(ppp->pass_filter, skb) == 0) {
+		    BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
 			if (ppp->debug & 1)
 				netdev_printk(KERN_DEBUG, ppp->dev,
 					      "PPP: outbound frame "
@@ -1215,7 +1215,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
 		}
 		/* if this packet passes the active filter, record the time */
 		if (!(ppp->active_filter &&
-		      SK_RUN_FILTER(ppp->active_filter, skb) == 0))
+		      BPF_PROG_RUN(ppp->active_filter, skb) == 0))
 			ppp->last_xmit = jiffies;
 		skb_pull(skb, 2);
 #else
@@ -1839,7 +1839,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 
 			*skb_push(skb, 2) = 0;
 			if (ppp->pass_filter &&
-			    SK_RUN_FILTER(ppp->pass_filter, skb) == 0) {
+			    BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
 				if (ppp->debug & 1)
 					netdev_printk(KERN_DEBUG, ppp->dev,
 						      "PPP: inbound frame "
@@ -1848,7 +1848,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 				return;
 			}
 			if (!(ppp->active_filter &&
-			      SK_RUN_FILTER(ppp->active_filter, skb) == 0))
+			      BPF_PROG_RUN(ppp->active_filter, skb) == 0))
 				ppp->last_recv = jiffies;
 			__skb_pull(skb, 2);
 		} else
@@ -2829,12 +2829,12 @@ static void ppp_destroy_interface(struct ppp *ppp)
 #endif /* CONFIG_PPP_MULTILINK */
 #ifdef CONFIG_PPP_FILTER
 	if (ppp->pass_filter) {
-		sk_unattached_filter_destroy(ppp->pass_filter);
+		bpf_prog_destroy(ppp->pass_filter);
 		ppp->pass_filter = NULL;
 	}
 
 	if (ppp->active_filter) {
-		sk_unattached_filter_destroy(ppp->active_filter);
+		bpf_prog_destroy(ppp->active_filter);
 		ppp->active_filter = NULL;
 	}
 #endif /* CONFIG_PPP_FILTER */
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index d7be9b36bce6..a1536d0d83a9 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -58,7 +58,7 @@ struct lb_priv_ex {
 };
 
 struct lb_priv {
-	struct sk_filter __rcu *fp;
+	struct bpf_prog __rcu *fp;
 	lb_select_tx_port_func_t __rcu *select_tx_port_func;
 	struct lb_pcpu_stats __percpu *pcpu_stats;
 	struct lb_priv_ex *ex; /* priv extension */
@@ -174,14 +174,14 @@ static lb_select_tx_port_func_t *lb_select_tx_port_get_func(const char *name)
 static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
 				    struct sk_buff *skb)
 {
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 	uint32_t lhash;
 	unsigned char *c;
 
 	fp = rcu_dereference_bh(lb_priv->fp);
 	if (unlikely(!fp))
 		return 0;
-	lhash = SK_RUN_FILTER(fp, skb);
+	lhash = BPF_PROG_RUN(fp, skb);
 	c = (char *) &lhash;
 	return c[0] ^ c[1] ^ c[2] ^ c[3];
 }
@@ -271,8 +271,8 @@ static void __fprog_destroy(struct sock_fprog_kern *fprog)
 static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 {
 	struct lb_priv *lb_priv = get_lb_priv(team);
-	struct sk_filter *fp = NULL;
-	struct sk_filter *orig_fp = NULL;
+	struct bpf_prog *fp = NULL;
+	struct bpf_prog *orig_fp = NULL;
 	struct sock_fprog_kern *fprog = NULL;
 	int err;
 
@@ -281,7 +281,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 				     ctx->data.bin_val.ptr);
 		if (err)
 			return err;
-		err = sk_unattached_filter_create(&fp, fprog);
+		err = bpf_prog_create(&fp, fprog);
 		if (err) {
 			__fprog_destroy(fprog);
 			return err;
@@ -300,7 +300,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 
 	if (orig_fp) {
 		synchronize_rcu();
-		sk_unattached_filter_destroy(orig_fp);
+		bpf_prog_destroy(orig_fp);
 	}
 	return 0;
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7cb9b40e9a2f..a5227ab8ccb1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -296,7 +296,8 @@ enum {
 })
 
 /* Macro to invoke filter function. */
-#define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
+#define SK_RUN_FILTER(filter, ctx) \
+	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
 
 struct bpf_insn {
 	__u8	code;		/* opcode */
@@ -323,12 +324,10 @@ struct sk_buff;
 struct sock;
 struct seccomp_data;
 
-struct sk_filter {
-	atomic_t		refcnt;
+struct bpf_prog {
 	u32			jited:1,	/* Is our filter JIT'ed? */
 				len:31;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-	struct rcu_head		rcu;
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
 	union {
@@ -338,25 +337,32 @@ struct sk_filter {
 	};
 };
 
-static inline unsigned int sk_filter_size(unsigned int proglen)
+struct sk_filter {
+	atomic_t	refcnt;
+	struct rcu_head	rcu;
+	struct bpf_prog	*prog;
+};
+
+#define BPF_PROG_RUN(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
+
+static inline unsigned int bpf_prog_size(unsigned int proglen)
 {
-	return max(sizeof(struct sk_filter),
-		   offsetof(struct sk_filter, insns[proglen]));
+	return max(sizeof(struct bpf_prog),
+		   offsetof(struct bpf_prog, insns[proglen]));
 }
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-void sk_filter_select_runtime(struct sk_filter *fp);
-void sk_filter_free(struct sk_filter *fp);
+void bpf_prog_select_runtime(struct bpf_prog *fp);
+void bpf_prog_free(struct bpf_prog *fp);
 
 int bpf_convert_filter(struct sock_filter *prog, int len,
 		       struct bpf_insn *new_prog, int *new_len);
 
-int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog_kern *fprog);
-void sk_unattached_filter_destroy(struct sk_filter *fp);
+int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
+void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
@@ -369,7 +375,7 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
-void bpf_int_jit_compile(struct sk_filter *fp);
+void bpf_int_jit_compile(struct bpf_prog *fp);
 
 #define BPF_ANC		BIT(15)
 
@@ -423,8 +429,8 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
 #include <linux/linkage.h>
 #include <linux/printk.h>
 
-void bpf_jit_compile(struct sk_filter *fp);
-void bpf_jit_free(struct sk_filter *fp);
+void bpf_jit_compile(struct bpf_prog *fp);
+void bpf_jit_free(struct bpf_prog *fp);
 
 static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 				u32 pass, void *image)
@@ -438,11 +444,11 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 #else
 #include <linux/slab.h>
 
-static inline void bpf_jit_compile(struct sk_filter *fp)
+static inline void bpf_jit_compile(struct bpf_prog *fp)
 {
 }
 
-static inline void bpf_jit_free(struct sk_filter *fp)
+static inline void bpf_jit_free(struct bpf_prog *fp)
 {
 	kfree(fp);
 }
diff --git a/include/linux/isdn_ppp.h b/include/linux/isdn_ppp.h
index 8e10f57f109f..a0070c6dfaf8 100644
--- a/include/linux/isdn_ppp.h
+++ b/include/linux/isdn_ppp.h
@@ -180,8 +180,8 @@ struct ippp_struct {
   struct slcompress *slcomp;
 #endif
 #ifdef CONFIG_IPPP_FILTER
-  struct sk_filter *pass_filter;   /* filter for packets to pass */
-  struct sk_filter *active_filter; /* filter for pkts to reset idle */
+  struct bpf_prog *pass_filter;   /* filter for packets to pass */
+  struct bpf_prog *active_filter; /* filter for pkts to reset idle */
 #endif
   unsigned long debug;
   struct isdn_ppp_compressor *compressor,*decompressor;
diff --git a/include/uapi/linux/netfilter/xt_bpf.h b/include/uapi/linux/netfilter/xt_bpf.h
index 2ec9fbcd06f9..1fad2c27ac32 100644
--- a/include/uapi/linux/netfilter/xt_bpf.h
+++ b/include/uapi/linux/netfilter/xt_bpf.h
@@ -6,14 +6,14 @@
 
 #define XT_BPF_MAX_NUM_INSTR	64
 
-struct sk_filter;
+struct bpf_prog;
 
 struct xt_bpf_info {
 	__u16 bpf_program_num_elem;
 	struct sock_filter bpf_program[XT_BPF_MAX_NUM_INSTR];
 
 	/* only used in the kernel */
-	struct sk_filter *filter __attribute__((aligned(8)));
+	struct bpf_prog *filter __attribute__((aligned(8)));
 };
 
 #endif /*_XT_BPF_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 188ac5ba3900..7f0dbcbb34af 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -73,15 +73,13 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 
 /**
- *	__sk_run_filter - run a filter on a given context
- *	@ctx: buffer to run the filter on
- *	@insn: filter to apply
+ *	__bpf_prog_run - run eBPF program on a given context
+ *	@ctx: is the data we are operating on
+ *	@insn: is the array of eBPF instructions
  *
- * Decode and apply filter instructions to the skb->data. Return length to
- * keep, 0 for none. @ctx is the data we are operating on, @insn is the
- * array of filter instructions.
+ * Decode and execute eBPF instructions.
  */
-static unsigned int __sk_run_filter(void *ctx, const struct bpf_insn *insn)
+static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
@@ -508,29 +506,29 @@ load_byte:
 		return 0;
 }
 
-void __weak bpf_int_jit_compile(struct sk_filter *prog)
+void __weak bpf_int_jit_compile(struct bpf_prog *prog)
 {
 }
 
 /**
- *	sk_filter_select_runtime - select execution runtime for BPF program
- *	@fp: sk_filter populated with internal BPF program
+ *	bpf_prog_select_runtime - select execution runtime for BPF program
+ *	@fp: bpf_prog populated with internal BPF program
  *
  * try to JIT internal BPF program, if JIT is not available select interpreter
- * BPF program will be executed via SK_RUN_FILTER() macro
+ * BPF program will be executed via BPF_PROG_RUN() macro
  */
-void sk_filter_select_runtime(struct sk_filter *fp)
+void bpf_prog_select_runtime(struct bpf_prog *fp)
 {
-	fp->bpf_func = (void *) __sk_run_filter;
+	fp->bpf_func = (void *) __bpf_prog_run;
 
 	/* Probe if internal BPF can be JITed */
 	bpf_int_jit_compile(fp);
 }
-EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
 /* free internal BPF program */
-void sk_filter_free(struct sk_filter *fp)
+void bpf_prog_free(struct bpf_prog *fp)
 {
 	bpf_jit_free(fp);
 }
-EXPORT_SYMBOL_GPL(sk_filter_free);
+EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 33a3a97e2b58..2f3fa2cc2eac 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -54,7 +54,7 @@
 struct seccomp_filter {
 	atomic_t usage;
 	struct seccomp_filter *prev;
-	struct sk_filter *prog;
+	struct bpf_prog *prog;
 };
 
 /* Limit any path through the tree to 256KB worth of instructions. */
@@ -187,7 +187,7 @@ static u32 seccomp_run_filters(int syscall)
 	 * value always takes priority (ignoring the DATA).
 	 */
 	for (f = current->seccomp.filter; f; f = f->prev) {
-		u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
+		u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
 
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
@@ -260,7 +260,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (!filter)
 		goto free_prog;
 
-	filter->prog = kzalloc(sk_filter_size(new_len),
+	filter->prog = kzalloc(bpf_prog_size(new_len),
 			       GFP_KERNEL|__GFP_NOWARN);
 	if (!filter->prog)
 		goto free_filter;
@@ -273,7 +273,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	atomic_set(&filter->usage, 1);
 	filter->prog->len = new_len;
 
-	sk_filter_select_runtime(filter->prog);
+	bpf_prog_select_runtime(filter->prog);
 
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
@@ -337,7 +337,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		sk_filter_free(freeme->prog);
+		bpf_prog_free(freeme->prog);
 		kfree(freeme);
 	}
 }
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 5f48623ee1a7..89e0345733bd 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1761,9 +1761,9 @@ static int probe_filter_length(struct sock_filter *fp)
 	return len + 1;
 }
 
-static struct sk_filter *generate_filter(int which, int *err)
+static struct bpf_prog *generate_filter(int which, int *err)
 {
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 	struct sock_fprog_kern fprog;
 	unsigned int flen = probe_filter_length(tests[which].u.insns);
 	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
@@ -1773,7 +1773,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		fprog.filter = tests[which].u.insns;
 		fprog.len = flen;
 
-		*err = sk_unattached_filter_create(&fp, &fprog);
+		*err = bpf_prog_create(&fp, &fprog);
 		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
 			if (*err == -EINVAL) {
 				pr_cont("PASS\n");
@@ -1798,7 +1798,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		break;
 
 	case INTERNAL:
-		fp = kzalloc(sk_filter_size(flen), GFP_KERNEL);
+		fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL);
 		if (fp == NULL) {
 			pr_cont("UNEXPECTED_FAIL no memory left\n");
 			*err = -ENOMEM;
@@ -1809,7 +1809,7 @@ static struct sk_filter *generate_filter(int which, int *err)
 		memcpy(fp->insnsi, tests[which].u.insns_int,
 		       fp->len * sizeof(struct bpf_insn));
 
-		sk_filter_select_runtime(fp);
+		bpf_prog_select_runtime(fp);
 		break;
 	}
 
@@ -1817,21 +1817,21 @@ static struct sk_filter *generate_filter(int which, int *err)
 	return fp;
 }
 
-static void release_filter(struct sk_filter *fp, int which)
+static void release_filter(struct bpf_prog *fp, int which)
 {
 	__u8 test_type = tests[which].aux & TEST_TYPE_MASK;
 
 	switch (test_type) {
 	case CLASSIC:
-		sk_unattached_filter_destroy(fp);
+		bpf_prog_destroy(fp);
 		break;
 	case INTERNAL:
-		sk_filter_free(fp);
+		bpf_prog_free(fp);
 		break;
 	}
 }
 
-static int __run_one(const struct sk_filter *fp, const void *data,
+static int __run_one(const struct bpf_prog *fp, const void *data,
 		     int runs, u64 *duration)
 {
 	u64 start, finish;
@@ -1840,7 +1840,7 @@ static int __run_one(const struct sk_filter *fp, const void *data,
 	start = ktime_to_us(ktime_get());
 
 	for (i = 0; i < runs; i++)
-		ret = SK_RUN_FILTER(fp, data);
+		ret = BPF_PROG_RUN(fp, data);
 
 	finish = ktime_to_us(ktime_get());
 
@@ -1850,7 +1850,7 @@ static int __run_one(const struct sk_filter *fp, const void *data,
 	return ret;
 }
 
-static int run_one(const struct sk_filter *fp, struct bpf_test *test)
+static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
 {
 	int err_cnt = 0, i, runs = MAX_TESTRUNS;
 
@@ -1884,7 +1884,7 @@ static __init int test_bpf(void)
 	int i, err_cnt = 0, pass_cnt = 0;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		struct sk_filter *fp;
+		struct bpf_prog *fp;
 		int err;
 
 		pr_info("#%d %s ", i, tests[i].descr);
diff --git a/net/core/filter.c b/net/core/filter.c
index 6ac901613bee..d814b8a89d0f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -810,8 +810,8 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
 }
 EXPORT_SYMBOL(bpf_check_classic);
 
-static int sk_store_orig_filter(struct sk_filter *fp,
-				const struct sock_fprog *fprog)
+static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
+				      const struct sock_fprog *fprog)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
 	struct sock_fprog_kern *fkprog;
@@ -831,7 +831,7 @@ static int sk_store_orig_filter(struct sk_filter *fp,
 	return 0;
 }
 
-static void sk_release_orig_filter(struct sk_filter *fp)
+static void bpf_release_orig_filter(struct bpf_prog *fp)
 {
 	struct sock_fprog_kern *fprog = fp->orig_prog;
 
@@ -841,10 +841,16 @@ static void sk_release_orig_filter(struct sk_filter *fp)
 	}
 }
 
+static void __bpf_prog_release(struct bpf_prog *prog)
+{
+	bpf_release_orig_filter(prog);
+	bpf_prog_free(prog);
+}
+
 static void __sk_filter_release(struct sk_filter *fp)
 {
-	sk_release_orig_filter(fp);
-	sk_filter_free(fp);
+	__bpf_prog_release(fp->prog);
+	kfree(fp);
 }
 
 /**
@@ -872,7 +878,7 @@ static void sk_filter_release(struct sk_filter *fp)
 
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
 {
-	u32 filter_size = sk_filter_size(fp->len);
+	u32 filter_size = bpf_prog_size(fp->prog->len);
 
 	atomic_sub(filter_size, &sk->sk_omem_alloc);
 	sk_filter_release(fp);
@@ -883,7 +889,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
  */
 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 {
-	u32 filter_size = sk_filter_size(fp->len);
+	u32 filter_size = bpf_prog_size(fp->prog->len);
 
 	/* same check as in sock_kmalloc() */
 	if (filter_size <= sysctl_optmem_max &&
@@ -895,10 +901,10 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
 	return false;
 }
 
-static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
+static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 {
 	struct sock_filter *old_prog;
-	struct sk_filter *old_fp;
+	struct bpf_prog *old_fp;
 	int err, new_len, old_len = fp->len;
 
 	/* We are free to overwrite insns et al right here as it
@@ -927,7 +933,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 
 	/* Expand fp for appending the new filter representation. */
 	old_fp = fp;
-	fp = krealloc(old_fp, sk_filter_size(new_len), GFP_KERNEL);
+	fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL);
 	if (!fp) {
 		/* The old_fp is still around in case we couldn't
 		 * allocate new memory, so uncharge on that one.
@@ -949,7 +955,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 		 */
 		goto out_err_free;
 
-	sk_filter_select_runtime(fp);
+	bpf_prog_select_runtime(fp);
 
 	kfree(old_prog);
 	return fp;
@@ -957,11 +963,11 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp)
 out_err_free:
 	kfree(old_prog);
 out_err:
-	__sk_filter_release(fp);
+	__bpf_prog_release(fp);
 	return ERR_PTR(err);
 }
 
-static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
+static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
 {
 	int err;
 
@@ -970,7 +976,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 
 	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
-		__sk_filter_release(fp);
+		__bpf_prog_release(fp);
 		return ERR_PTR(err);
 	}
 
@@ -983,13 +989,13 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
 	 * internal BPF translation for the optimized interpreter.
 	 */
 	if (!fp->jited)
-		fp = __sk_migrate_filter(fp);
+		fp = bpf_migrate_filter(fp);
 
 	return fp;
 }
 
 /**
- *	sk_unattached_filter_create - create an unattached filter
+ *	bpf_prog_create - create an unattached filter
  *	@pfp: the unattached filter that is created
  *	@fprog: the filter program
  *
@@ -998,23 +1004,21 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp)
  * If an error occurs or there is insufficient memory for the filter
  * a negative errno code is returned. On success the return is zero.
  */
-int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog_kern *fprog)
+int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
-	struct sk_filter *fp;
+	struct bpf_prog *fp;
 
 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
+	fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL);
 	if (!fp)
 		return -ENOMEM;
 
 	memcpy(fp->insns, fprog->filter, fsize);
 
-	atomic_set(&fp->refcnt, 1);
 	fp->len = fprog->len;
 	/* Since unattached filters are not copied back to user
 	 * space through sk_get_filter(), we do not need to hold
@@ -1022,23 +1026,23 @@ int sk_unattached_filter_create(struct sk_filter **pfp,
 	 */
 	fp->orig_prog = NULL;
 
-	/* __sk_prepare_filter() already takes care of freeing
+	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
-	fp = __sk_prepare_filter(fp);
+	fp = bpf_prepare_filter(fp);
 	if (IS_ERR(fp))
 		return PTR_ERR(fp);
 
 	*pfp = fp;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
+EXPORT_SYMBOL_GPL(bpf_prog_create);
 
-void sk_unattached_filter_destroy(struct sk_filter *fp)
+void bpf_prog_destroy(struct bpf_prog *fp)
 {
-	__sk_filter_release(fp);
+	__bpf_prog_release(fp);
 }
-EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
+EXPORT_SYMBOL_GPL(bpf_prog_destroy);
 
 /**
  *	sk_attach_filter - attach a socket filter
@@ -1054,7 +1058,8 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 {
 	struct sk_filter *fp, *old_fp;
 	unsigned int fsize = bpf_classic_proglen(fprog);
-	unsigned int sk_fsize = sk_filter_size(fprog->len);
+	unsigned int bpf_fsize = bpf_prog_size(fprog->len);
+	struct bpf_prog *prog;
 	int err;
 
 	if (sock_flag(sk, SOCK_FILTER_LOCKED))
@@ -1064,29 +1069,36 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(sk_fsize, GFP_KERNEL);
-	if (!fp)
+	prog = kmalloc(bpf_fsize, GFP_KERNEL);
+	if (!prog)
 		return -ENOMEM;
 
-	if (copy_from_user(fp->insns, fprog->filter, fsize)) {
-		kfree(fp);
+	if (copy_from_user(prog->insns, fprog->filter, fsize)) {
+		kfree(prog);
 		return -EFAULT;
 	}
 
-	fp->len = fprog->len;
+	prog->len = fprog->len;
 
-	err = sk_store_orig_filter(fp, fprog);
+	err = bpf_prog_store_orig_filter(prog, fprog);
 	if (err) {
-		kfree(fp);
+		kfree(prog);
 		return -ENOMEM;
 	}
 
-	/* __sk_prepare_filter() already takes care of freeing
+	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
-	fp = __sk_prepare_filter(fp);
-	if (IS_ERR(fp))
-		return PTR_ERR(fp);
+	prog = bpf_prepare_filter(prog);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+	if (!fp) {
+		__bpf_prog_release(prog);
+		return -ENOMEM;
+	}
+	fp->prog = prog;
 
 	atomic_set(&fp->refcnt, 0);
 
@@ -1142,7 +1154,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 	/* We're copying the filter that has been originally attached,
 	 * so no conversion/decode needed anymore.
 	 */
-	fprog = filter->orig_prog;
+	fprog = filter->prog->orig_prog;
 
 	ret = fprog->len;
 	if (!len)
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 12ab7b4be609..4eab4a94a59d 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -107,11 +107,11 @@
 #include <linux/filter.h>
 #include <linux/ptp_classify.h>
 
-static struct sk_filter *ptp_insns __read_mostly;
+static struct bpf_prog *ptp_insns __read_mostly;
 
 unsigned int ptp_classify_raw(const struct sk_buff *skb)
 {
-	return SK_RUN_FILTER(ptp_insns, skb);
+	return BPF_PROG_RUN(ptp_insns, skb);
 }
 EXPORT_SYMBOL_GPL(ptp_classify_raw);
 
@@ -189,5 +189,5 @@ void __init ptp_classifier_init(void)
 		.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
 	};
 
-	BUG_ON(sk_unattached_filter_create(&ptp_insns, &ptp_prog));
+	BUG_ON(bpf_prog_create(&ptp_insns, &ptp_prog));
 }
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 57d922320c59..ad704c757bb4 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -68,7 +68,7 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 	if (!filter)
 		goto out;
 
-	fprog = filter->orig_prog;
+	fprog = filter->prog->orig_prog;
 	flen = bpf_classic_proglen(fprog);
 
 	attr = nla_reserve(skb, attrtype, flen);
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index bbffdbdaf603..dffee9d47ec4 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -28,7 +28,7 @@ static int bpf_mt_check(const struct xt_mtchk_param *par)
 	program.len = info->bpf_program_num_elem;
 	program.filter = info->bpf_program;
 
-	if (sk_unattached_filter_create(&info->filter, &program)) {
+	if (bpf_prog_create(&info->filter, &program)) {
 		pr_info("bpf: check failed: parse error\n");
 		return -EINVAL;
 	}
@@ -40,13 +40,13 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_bpf_info *info = par->matchinfo;
 
-	return SK_RUN_FILTER(info->filter, skb);
+	return BPF_PROG_RUN(info->filter, skb);
 }
 
 static void bpf_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_bpf_info *info = par->matchinfo;
-	sk_unattached_filter_destroy(info->filter);
+	bpf_prog_destroy(info->filter);
 }
 
 static struct xt_match bpf_mt_reg __read_mostly = {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 13f64df2c710..0e30d58149da 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -30,7 +30,7 @@ struct cls_bpf_head {
 };
 
 struct cls_bpf_prog {
-	struct sk_filter *filter;
+	struct bpf_prog *filter;
 	struct sock_filter *bpf_ops;
 	struct tcf_exts exts;
 	struct tcf_result res;
@@ -54,7 +54,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	int ret;
 
 	list_for_each_entry(prog, &head->plist, link) {
-		int filter_res = SK_RUN_FILTER(prog->filter, skb);
+		int filter_res = BPF_PROG_RUN(prog->filter, skb);
 
 		if (filter_res == 0)
 			continue;
@@ -92,7 +92,7 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 	tcf_unbind_filter(tp, &prog->res);
 	tcf_exts_destroy(tp, &prog->exts);
 
-	sk_unattached_filter_destroy(prog->filter);
+	bpf_prog_destroy(prog->filter);
 
 	kfree(prog->bpf_ops);
 	kfree(prog);
@@ -161,7 +161,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	struct sock_filter *bpf_ops, *bpf_old;
 	struct tcf_exts exts;
 	struct sock_fprog_kern tmp;
-	struct sk_filter *fp, *fp_old;
+	struct bpf_prog *fp, *fp_old;
 	u16 bpf_size, bpf_len;
 	u32 classid;
 	int ret;
@@ -193,7 +193,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	tmp.len = bpf_len;
 	tmp.filter = bpf_ops;
 
-	ret = sk_unattached_filter_create(&fp, &tmp);
+	ret = bpf_prog_create(&fp, &tmp);
 	if (ret)
 		goto errout_free;
 
@@ -211,7 +211,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	tcf_exts_change(tp, &prog->exts, &exts);
 
 	if (fp_old)
-		sk_unattached_filter_destroy(fp_old);
+		bpf_prog_destroy(fp_old);
 	if (bpf_old)
 		kfree(bpf_old);
 
-- 
cgit v1.2.3


From 1b69be5e8afc634f39ad695a6ab6aad0cf0975c7 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Tue, 10 Jun 2014 11:41:57 +1000
Subject: drivers/vfio: EEH support for VFIO PCI device

The patch adds new IOCTL commands for sPAPR VFIO container device
to support EEH functionality for PCI devices, which have been passed
through from host to somebody else via VFIO.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Acked-by: Alexander Graf <agraf@suse.de>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 Documentation/vfio.txt              | 87 +++++++++++++++++++++++++++++++++++--
 drivers/vfio/Makefile               |  1 +
 drivers/vfio/pci/vfio_pci.c         | 18 ++++++--
 drivers/vfio/vfio_iommu_spapr_tce.c | 17 +++++++-
 drivers/vfio/vfio_spapr_eeh.c       | 87 +++++++++++++++++++++++++++++++++++++
 include/linux/vfio.h                | 23 ++++++++++
 include/uapi/linux/vfio.h           | 34 +++++++++++++++
 7 files changed, 259 insertions(+), 8 deletions(-)
 create mode 100644 drivers/vfio/vfio_spapr_eeh.c

(limited to 'include/uapi/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index b9ca02370d46..96978eced341 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -305,7 +305,15 @@ faster, the map/unmap handling has been implemented in real mode which provides
 an excellent performance which has limitations such as inability to do
 locked pages accounting in real time.
 
-So 3 additional ioctls have been added:
+4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
+subtree that can be treated as a unit for the purposes of partitioning and
+error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+function of a multi-function IOA, or multiple IOAs (possibly including switch
+and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
+and recover from them via EEH RTAS services, which works on the basis of
+additional ioctl commands.
+
+So 4 additional ioctls have been added:
 
 	VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
 		of the DMA window on the PCI bus.
@@ -316,9 +324,12 @@ So 3 additional ioctls have been added:
 
 	VFIO_IOMMU_DISABLE - disables the container.
 
+	VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
 
 The code flow from the example above should be slightly changed:
 
+	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+
 	.....
 	/* Add the group to the container */
 	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
@@ -342,9 +353,79 @@ The code flow from the example above should be slightly changed:
 	dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
 
 	/* Check here is .iova/.size are within DMA window from spapr_iommu_info */
-
 	ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
-	.....
+
+	/* Get a file descriptor for the device */
+	device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+	....
+
+	/* Gratuitous device reset and go... */
+	ioctl(device, VFIO_DEVICE_RESET);
+
+	/* Make sure EEH is supported */
+	ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
+
+	/* Enable the EEH functionality on the device */
+	pe_op.op = VFIO_EEH_PE_ENABLE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* You're suggested to create additional data struct to represent
+	 * PE, and put child devices belonging to same IOMMU group to the
+	 * PE instance for later reference.
+	 */
+
+	/* Check the PE's state and make sure it's in functional state */
+	pe_op.op = VFIO_EEH_PE_GET_STATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Save device state using pci_save_state().
+	 * EEH should be enabled on the specified device.
+	 */
+
+	....
+
+	/* When 0xFF's returned from reading PCI config space or IO BARs
+	 * of the PCI device. Check the PE's state to see if that has been
+	 * frozen.
+	 */
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Waiting for pending PCI transactions to be completed and don't
+	 * produce any more PCI traffic from/to the affected PE until
+	 * recovery is finished.
+	 */
+
+	/* Enable IO for the affected PE and collect logs. Usually, the
+	 * standard part of PCI config space, AER registers are dumped
+	 * as logs for further analysis.
+	 */
+	pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/*
+	 * Issue PE reset: hot or fundamental reset. Usually, hot reset
+	 * is enough. However, the firmware of some PCI adapters would
+	 * require fundamental reset.
+	 */
+	pe_op.op = VFIO_EEH_PE_RESET_HOT;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Configure the PCI bridges for the affected PE */
+	pe_op.op = VFIO_EEH_PE_CONFIGURE;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	/* Restored state we saved at initialization time. pci_restore_state()
+	 * is good enough as an example.
+	 */
+
+	/* Hopefully, error is recovered successfully. Now, you can resume to
+	 * start PCI traffic to/from the affected PE.
+	 */
+
+	....
 
 -------------------------------------------------------------------------------
 
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 72bfabc8629e..50e30bc75e85 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
+obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 010e0f8b8e4f..e2ee80f36e3e 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -157,8 +157,10 @@ static void vfio_pci_release(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
 
-	if (atomic_dec_and_test(&vdev->refcnt))
+	if (atomic_dec_and_test(&vdev->refcnt)) {
+		vfio_spapr_pci_eeh_release(vdev->pdev);
 		vfio_pci_disable(vdev);
+	}
 
 	module_put(THIS_MODULE);
 }
@@ -166,19 +168,27 @@ static void vfio_pci_release(void *device_data)
 static int vfio_pci_open(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
+	int ret;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
 	if (atomic_inc_return(&vdev->refcnt) == 1) {
-		int ret = vfio_pci_enable(vdev);
+		ret = vfio_pci_enable(vdev);
+		if (ret)
+			goto error;
+
+		ret = vfio_spapr_pci_eeh_open(vdev->pdev);
 		if (ret) {
-			module_put(THIS_MODULE);
-			return ret;
+			vfio_pci_disable(vdev);
+			goto error;
 		}
 	}
 
 	return 0;
+error:
+	module_put(THIS_MODULE);
+	return ret;
 }
 
 static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index a84788ba662c..730b4ef3e0cc 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -156,7 +156,16 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 	switch (cmd) {
 	case VFIO_CHECK_EXTENSION:
-		return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0;
+		switch (arg) {
+		case VFIO_SPAPR_TCE_IOMMU:
+			ret = 1;
+			break;
+		default:
+			ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
+			break;
+		}
+
+		return (ret < 0) ? 0 : ret;
 
 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 		struct vfio_iommu_spapr_tce_info info;
@@ -283,6 +292,12 @@ static long tce_iommu_ioctl(void *iommu_data,
 		tce_iommu_disable(container);
 		mutex_unlock(&container->lock);
 		return 0;
+	case VFIO_EEH_PE_OP:
+		if (!container->tbl || !container->tbl->it_group)
+			return -ENODEV;
+
+		return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
+						  cmd, arg);
 	}
 
 	return -ENOTTY;
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
new file mode 100644
index 000000000000..f834b4ce1431
--- /dev/null
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -0,0 +1,87 @@
+/*
+ * EEH functionality support for VFIO devices. The feature is only
+ * available on sPAPR compatible platforms.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <asm/eeh.h>
+
+/* We might build address mapping here for "fast" path later */
+int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return eeh_dev_open(pdev);
+}
+
+void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+	eeh_dev_release(pdev);
+}
+
+long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				unsigned int cmd, unsigned long arg)
+{
+	struct eeh_pe *pe;
+	struct vfio_eeh_pe_op op;
+	unsigned long minsz;
+	long ret = -EINVAL;
+
+	switch (cmd) {
+	case VFIO_CHECK_EXTENSION:
+		if (arg == VFIO_EEH)
+			ret = eeh_enabled() ? 1 : 0;
+		else
+			ret = 0;
+		break;
+	case VFIO_EEH_PE_OP:
+		pe = eeh_iommu_group_to_pe(group);
+		if (!pe)
+			return -ENODEV;
+
+		minsz = offsetofend(struct vfio_eeh_pe_op, op);
+		if (copy_from_user(&op, (void __user *)arg, minsz))
+			return -EFAULT;
+		if (op.argsz < minsz || op.flags)
+			return -EINVAL;
+
+		switch (op.op) {
+		case VFIO_EEH_PE_DISABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE);
+			break;
+		case VFIO_EEH_PE_ENABLE:
+			ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_IO:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
+			break;
+		case VFIO_EEH_PE_UNFREEZE_DMA:
+			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
+			break;
+		case VFIO_EEH_PE_GET_STATE:
+			ret = eeh_pe_get_state(pe);
+			break;
+		case VFIO_EEH_PE_RESET_DEACTIVATE:
+			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+			break;
+		case VFIO_EEH_PE_RESET_HOT:
+			ret = eeh_pe_reset(pe, EEH_RESET_HOT);
+			break;
+		case VFIO_EEH_PE_RESET_FUNDAMENTAL:
+			ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL);
+			break;
+		case VFIO_EEH_PE_CONFIGURE:
+			ret = eeh_pe_configure(pe);
+			break;
+		default:
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 8ec980b5e3af..25a0fbd4b998 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,4 +98,27 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
 extern long vfio_external_check_extension(struct vfio_group *group,
 					  unsigned long arg);
 
+#ifdef CONFIG_EEH
+extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
+extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				       unsigned int cmd,
+				       unsigned long arg);
+#else
+static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+	return 0;
+}
+
+static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+}
+
+static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+					      unsigned int cmd,
+					      unsigned long arg)
+{
+	return -ENOTTY;
+}
+#endif /* CONFIG_EEH */
 #endif /* VFIO_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index cb9023d4f063..6612974c64bf 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -30,6 +30,9 @@
  */
 #define VFIO_DMA_CC_IOMMU		4
 
+/* Check if EEH is supported */
+#define VFIO_EEH			5
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -455,6 +458,37 @@ struct vfio_iommu_spapr_tce_info {
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
 
+/*
+ * EEH PE operation struct provides ways to:
+ * - enable/disable EEH functionality;
+ * - unfreeze IO/DMA for frozen PE;
+ * - read PE state;
+ * - reset PE;
+ * - configure PE.
+ */
+struct vfio_eeh_pe_op {
+	__u32 argsz;
+	__u32 flags;
+	__u32 op;
+};
+
+#define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
+#define VFIO_EEH_PE_ENABLE		1	/* Enable EEH functionality  */
+#define VFIO_EEH_PE_UNFREEZE_IO		2	/* Enable IO for frozen PE   */
+#define VFIO_EEH_PE_UNFREEZE_DMA	3	/* Enable DMA for frozen PE  */
+#define VFIO_EEH_PE_GET_STATE		4	/* PE state retrieval        */
+#define  VFIO_EEH_PE_STATE_NORMAL	0	/* PE in functional state    */
+#define  VFIO_EEH_PE_STATE_RESET	1	/* PE reset in progress      */
+#define  VFIO_EEH_PE_STATE_STOPPED	2	/* Stopped DMA and IO        */
+#define  VFIO_EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA only          */
+#define  VFIO_EEH_PE_STATE_UNAVAIL	5	/* State unavailable         */
+#define VFIO_EEH_PE_RESET_DEACTIVATE	5	/* Deassert PE reset         */
+#define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
+#define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
+#define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+
+#define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
cgit v1.2.3


From c6e9d6f38894798696f23c8084ca7edbf16ee895 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 17 Jul 2014 04:13:05 -0400
Subject: random: introduce getrandom(2) system call

The getrandom(2) system call was requested by the LibreSSL Portable
developers.  It is analoguous to the getentropy(2) system call in
OpenBSD.

The rationale of this system call is to provide resiliance against
file descriptor exhaustion attacks, where the attacker consumes all
available file descriptors, forcing the use of the fallback code where
/dev/[u]random is not available.  Since the fallback code is often not
well-tested, it is better to eliminate this potential failure mode
entirely.

The other feature provided by this new system call is the ability to
request randomness from the /dev/urandom entropy pool, but to block
until at least 128 bits of entropy has been accumulated in the
/dev/urandom entropy pool.  Historically, the emphasis in the
/dev/urandom development has been to ensure that urandom pool is
initialized as quickly as possible after system boot, and preferably
before the init scripts start execution.

This is because changing /dev/urandom reads to block represents an
interface change that could potentially break userspace which is not
acceptable.  In practice, on most x86 desktop and server systems, in
general the entropy pool can be initialized before it is needed (and
in modern kernels, we will printk a warning message if not).  However,
on an embedded system, this may not be the case.  And so with this new
interface, we can provide the functionality of blocking until the
urandom pool has been initialized.  Any userspace program which uses
this new functionality must take care to assure that if it is used
during the boot process, that it will not cause the init scripts or
other portions of the system startup to hang indefinitely.

SYNOPSIS
	#include <linux/random.h>

	int getrandom(void *buf, size_t buflen, unsigned int flags);

DESCRIPTION
	The system call getrandom() fills the buffer pointed to by buf
	with up to buflen random bytes which can be used to seed user
	space random number generators (i.e., DRBG's) or for other
	cryptographic uses.  It should not be used for Monte Carlo
	simulations or other programs/algorithms which are doing
	probabilistic sampling.

	If the GRND_RANDOM flags bit is set, then draw from the
	/dev/random pool instead of the /dev/urandom pool.  The
	/dev/random pool is limited based on the entropy that can be
	obtained from environmental noise, so if there is insufficient
	entropy, the requested number of bytes may not be returned.
	If there is no entropy available at all, getrandom(2) will
	either block, or return an error with errno set to EAGAIN if
	the GRND_NONBLOCK bit is set in flags.

	If the GRND_RANDOM bit is not set, then the /dev/urandom pool
	will be used.  Unlike using read(2) to fetch data from
	/dev/urandom, if the urandom pool has not been sufficiently
	initialized, getrandom(2) will block (or return -1 with the
	errno set to EAGAIN if the GRND_NONBLOCK bit is set in flags).

	The getentropy(2) system call in OpenBSD can be emulated using
	the following function:

            int getentropy(void *buf, size_t buflen)
            {
                    int     ret;

                    if (buflen > 256)
                            goto failure;
                    ret = getrandom(buf, buflen, 0);
                    if (ret < 0)
                            return ret;
                    if (ret == buflen)
                            return 0;
            failure:
                    errno = EIO;
                    return -1;
            }

RETURN VALUE
       On success, the number of bytes that was filled in the buf is
       returned.  This may not be all the bytes requested by the
       caller via buflen if insufficient entropy was present in the
       /dev/random pool, or if the system call was interrupted by a
       signal.

       On error, -1 is returned, and errno is set appropriately.

ERRORS
	EINVAL		An invalid flag was passed to getrandom(2)

	EFAULT		buf is outside the accessible address space.

	EAGAIN		The requested entropy was not available, and
			getentropy(2) would have blocked if the
			GRND_NONBLOCK flag was not set.

	EINTR		While blocked waiting for entropy, the call was
			interrupted by a signal handler; see the description
			of how interrupted read(2) calls on "slow" devices
			are handled with and without the SA_RESTART flag
			in the signal(7) man page.

NOTES
	For small requests (buflen <= 256) getrandom(2) will not
	return EINTR when reading from the urandom pool once the
	entropy pool has been initialized, and it will return all of
	the bytes that have been requested.  This is the recommended
	way to use getrandom(2), and is designed for compatibility
	with OpenBSD's getentropy() system call.

	However, if you are using GRND_RANDOM, then getrandom(2) may
	block until the entropy accounting determines that sufficient
	environmental noise has been gathered such that getrandom(2)
	will be operating as a NRBG instead of a DRBG for those people
	who are working in the NIST SP 800-90 regime.  Since it may
	block for a long time, these guarantees do *not* apply.  The
	user may want to interrupt a hanging process using a signal,
	so blocking until all of the requested bytes are returned
	would be unfriendly.

	For this reason, the user of getrandom(2) MUST always check
	the return value, in case it returns some error, or if fewer
	bytes than requested was returned.  In the case of
	!GRND_RANDOM and small request, the latter should never
	happen, but the careful userspace code (and all crypto code
	should be careful) should check for this anyway!

	Finally, unless you are doing long-term key generation (and
	perhaps not even then), you probably shouldn't be using
	GRND_RANDOM.  The cryptographic algorithms used for
	/dev/urandom are quite conservative, and so should be
	sufficient for all purposes.  The disadvantage of GRND_RANDOM
	is that it can block, and the increased complexity required to
	deal with partially fulfilled getrandom(2) requests.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Zach Brown <zab@zabbo.net>
---
 arch/x86/syscalls/syscall_32.tbl  |  1 +
 arch/x86/syscalls/syscall_64.tbl  |  1 +
 drivers/char/random.c             | 40 ++++++++++++++++++++++++++++++++++++---
 include/linux/syscalls.h          |  3 +++
 include/uapi/asm-generic/unistd.h |  4 +++-
 include/uapi/linux/random.h       |  9 +++++++++
 6 files changed, 54 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b867921612..5b46a618aeb1 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,4 @@
 351	i386	sched_setattr		sys_sched_setattr
 352	i386	sched_getattr		sys_sched_getattr
 353	i386	renameat2		sys_renameat2
+355	i386	getrandom		sys_getrandom
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1646d2..0dc4bf891460 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,7 @@
 314	common	sched_setattr		sys_sched_setattr
 315	common	sched_getattr		sys_sched_getattr
 316	common	renameat2		sys_renameat2
+318	common	getrandom		sys_getrandom
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/drivers/char/random.c b/drivers/char/random.c
index aa22fe551c2a..7d1682ea1e86 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -258,6 +258,8 @@
 #include <linux/kmemcheck.h>
 #include <linux/workqueue.h>
 #include <linux/irq.h>
+#include <linux/syscalls.h>
+#include <linux/completion.h>
 
 #include <asm/processor.h>
 #include <asm/uaccess.h>
@@ -404,6 +406,7 @@ static struct poolinfo {
  */
 static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
 static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait);
 static struct fasync_struct *fasync;
 
 /**********************************************************************
@@ -657,6 +660,7 @@ retry:
 		r->entropy_total = 0;
 		if (r == &nonblocking_pool) {
 			prandom_reseed_late();
+			wake_up_interruptible(&urandom_init_wait);
 			pr_notice("random: %s pool is initialized\n", r->name);
 		}
 	}
@@ -1174,13 +1178,14 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
 {
 	ssize_t ret = 0, i;
 	__u8 tmp[EXTRACT_SIZE];
+	int large_request = (nbytes > 256);
 
 	trace_extract_entropy_user(r->name, nbytes, ENTROPY_BITS(r), _RET_IP_);
 	xfer_secondary_pool(r, nbytes);
 	nbytes = account(r, nbytes, 0, 0);
 
 	while (nbytes) {
-		if (need_resched()) {
+		if (large_request && need_resched()) {
 			if (signal_pending(current)) {
 				if (ret == 0)
 					ret = -ERESTARTSYS;
@@ -1355,7 +1360,7 @@ static int arch_random_refill(void)
 }
 
 static ssize_t
-random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+_random_read(int nonblock, char __user *buf, size_t nbytes)
 {
 	ssize_t n;
 
@@ -1379,7 +1384,7 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 		if (arch_random_refill())
 			continue;
 
-		if (file->f_flags & O_NONBLOCK)
+		if (nonblock)
 			return -EAGAIN;
 
 		wait_event_interruptible(random_read_wait,
@@ -1390,6 +1395,12 @@ random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 	}
 }
 
+static ssize_t
+random_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+{
+	return _random_read(file->f_flags & O_NONBLOCK, buf, nbytes);
+}
+
 static ssize_t
 urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
@@ -1533,6 +1544,29 @@ const struct file_operations urandom_fops = {
 	.llseek = noop_llseek,
 };
 
+SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count,
+		unsigned int, flags)
+{
+	if (flags & ~(GRND_NONBLOCK|GRND_RANDOM))
+		return -EINVAL;
+
+	if (count > INT_MAX)
+		count = INT_MAX;
+
+	if (flags & GRND_RANDOM)
+		return _random_read(flags & GRND_NONBLOCK, buf, count);
+
+	if (unlikely(nonblocking_pool.initialized == 0)) {
+		if (flags & GRND_NONBLOCK)
+			return -EAGAIN;
+		wait_event_interruptible(urandom_init_wait,
+					 nonblocking_pool.initialized);
+		if (signal_pending(current))
+			return -ERESTARTSYS;
+	}
+	return urandom_read(NULL, buf, count, NULL);
+}
+
 /***************************************************************
  * Random UUID interface
  *
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0881a0ed322..43324a897cf2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -866,4 +866,7 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_getrandom(char __user *buf, size_t count,
+			      unsigned int flags);
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 333640608087..1d104a2ca643 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 276
 __SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_getrandom 278
+__SYSCALL(__NR_getrandom, sys_getrandom)
 
 #undef __NR_syscalls
-#define __NR_syscalls 277
+#define __NR_syscalls 279
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h
index fff3528a078f..3f93d1695e7f 100644
--- a/include/uapi/linux/random.h
+++ b/include/uapi/linux/random.h
@@ -40,4 +40,13 @@ struct rand_pool_info {
 	__u32	buf[0];
 };
 
+/*
+ * Flags for getrandom(2)
+ *
+ * GRND_NONBLOCK	Don't block and return EAGAIN instead
+ * GRND_RANDOM		Use the /dev/random pool instead of /dev/urandom
+ */
+#define GRND_NONBLOCK	0x0001
+#define GRND_RANDOM	0x0002
+
 #endif /* _UAPI_LINUX_RANDOM_H */
-- 
cgit v1.2.3


From f24b9be5957b38bb420b838115040dc2031b7d0c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 4 Aug 2014 22:11:45 -0400
Subject: net-timestamp: extend SCM_TIMESTAMPING ancillary data struct

Applications that request kernel tx timestamps with SO_TIMESTAMPING
read timestamps as recvmsg() ancillary data. The response is defined
implicitly as timespec[3].

1) define struct scm_timestamping explicitly and

2) add support for new tstamp types. On tx, scm_timestamping always
   accompanies a sock_extended_err. Define previously unused field
   ee_info to signal the type of ts[0]. Introduce SCM_TSTAMP_SND to
   define the existing behavior.

The reception path is not modified. On rx, no struct similar to
sock_extended_err is passed along with SCM_TIMESTAMPING.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h        |  3 +++
 include/net/sock.h            |  4 +++-
 include/uapi/linux/errqueue.h | 18 ++++++++++++++++++
 net/core/skbuff.c             |  1 +
 net/socket.c                  | 20 +++++++++++---------
 5 files changed, 36 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 281deced7469..477f0f60db45 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -249,6 +249,9 @@ enum {
 	SKBTX_SHARED_FRAG = 1 << 5,
 };
 
+#define SKBTX_ANY_SW_TSTAMP	SKBTX_SW_TSTAMP
+#define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
+
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
  * lower device, the skb last reference should be 0 when calling this.
diff --git a/include/net/sock.h b/include/net/sock.h
index b91c8868ab8d..02f5b35e65f1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2169,7 +2169,9 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 	 */
 	if (sock_flag(sk, SOCK_RCVTSTAMP) ||
 	    sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) ||
-	    (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) ||
+	    (kt.tv64 &&
+	     (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) ||
+	      skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP)) ||
 	    (hwtstamps->hwtstamp.tv64 &&
 	     sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)))
 		__sock_recv_timestamp(msg, sk, skb);
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index aacd4fb7102a..accee72cae7c 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -22,5 +22,23 @@ struct sock_extended_err {
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
+/**
+ *	struct scm_timestamping - timestamps exposed through cmsg
+ *
+ *	The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_*
+ *	communicate network timestamps by passing this struct in a cmsg with
+ *	recvmsg(). See Documentation/networking/timestamping.txt for details.
+ */
+struct scm_timestamping {
+	struct timespec ts[3];
+};
+
+/* The type of scm_timestamping, passed in sock_extended_err ee_info.
+ * This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0]
+ * is zero, then this is a hardware timestamp and recorded in ts[2].
+ */
+enum {
+	SCM_TSTAMP_SND,		/* driver passed skb to NIC, or HW */
+};
 
 #endif /* _UAPI_LINUX_ERRQUEUE_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c1a33033cbe2..c9f68802e992 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3521,6 +3521,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	serr->ee.ee_info = SCM_TSTAMP_SND;
 
 	err = sock_queue_err_skb(sk, skb);
 
diff --git a/net/socket.c b/net/socket.c
index d8222c025061..dc0cc5d95ee5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,6 +106,7 @@
 #include <linux/sockios.h>
 #include <linux/atalk.h>
 #include <net/busy_poll.h>
+#include <linux/errqueue.h>
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 unsigned int sysctl_net_busy_read __read_mostly;
@@ -697,7 +698,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb)
 {
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
-	struct timespec ts[3];
+	struct scm_timestamping tss;
 	int empty = 1;
 	struct skb_shared_hwtstamps *shhwtstamps =
 		skb_hwtstamps(skb);
@@ -714,24 +715,25 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
 				 sizeof(tv), &tv);
 		} else {
-			skb_get_timestampns(skb, &ts[0]);
+			struct timespec ts;
+			skb_get_timestampns(skb, &ts);
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
-				 sizeof(ts[0]), &ts[0]);
+				 sizeof(ts), &ts);
 		}
 	}
 
-
-	memset(ts, 0, sizeof(ts));
-	if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
-	    ktime_to_timespec_cond(skb->tstamp, ts + 0))
+	memset(&tss, 0, sizeof(tss));
+	if ((sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) ||
+	     skb_shinfo(skb)->tx_flags & SKBTX_ANY_SW_TSTAMP) &&
+	    ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
 		empty = 0;
 	if (shhwtstamps &&
 	    sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
-	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
+	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
 		empty = 0;
 	if (!empty)
 		put_cmsg(msg, SOL_SOCKET,
-			 SCM_TIMESTAMPING, sizeof(ts), &ts);
+			 SCM_TIMESTAMPING, sizeof(tss), &tss);
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
-- 
cgit v1.2.3


From 09c2d251b70723650ba47e83571ff49281320f7c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 4 Aug 2014 22:11:47 -0400
Subject: net-timestamp: add key to disambiguate concurrent datagrams

Datagrams timestamped on transmission can coexist in the kernel stack
and be reordered in packet scheduling. When reading looped datagrams
from the socket error queue it is not always possible to unique
correlate looped data with original send() call (for application
level retransmits). Even if possible, it may be expensive and complex,
requiring packet inspection.

Introduce a data-independent ID mechanism to associate timestamps with
send calls. Pass an ID alongside the timestamp in field ee_data of
sock_extended_err.

The ID is a simple 32 bit unsigned int that is associated with the
socket and incremented on each send() call for which software tx
timestamp generation is enabled.

The feature is enabled only if SOF_TIMESTAMPING_OPT_ID is set, to
avoid changing ee_data for existing applications that expect it 0.
The counter is reset each time the flag is reenabled. Reenabling
does not change the ID of already submitted data. It is possible
to receive out of order IDs if the timestamp stream is not quiesced
first.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 1 +
 include/net/sock.h              | 2 ++
 include/uapi/linux/net_tstamp.h | 8 +++++---
 net/core/skbuff.c               | 2 ++
 net/core/sock.c                 | 3 +++
 net/ipv4/ip_output.c            | 6 ++++++
 net/ipv6/ip6_output.c           | 9 ++++++++-
 7 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 477f0f60db45..0e35b3af7317 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -278,6 +278,7 @@ struct skb_shared_info {
 	unsigned short  gso_type;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+	u32		tskey;
 	__be32          ip6_frag_id;
 
 	/*
diff --git a/include/net/sock.h b/include/net/sock.h
index a21129716aae..52fe0bc5598a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -280,6 +280,7 @@ struct cg_proto;
   *	@sk_timer: sock cleanup timer
   *	@sk_stamp: time stamp of last packet received
   *	@sk_tsflags: SO_TIMESTAMPING socket options
+  *	@sk_tskey: counter to disambiguate concurrent tstamp requests
   *	@sk_socket: Identd and reporting IO signals
   *	@sk_user_data: RPC layer private data
   *	@sk_frag: cached page frag
@@ -414,6 +415,7 @@ struct sock {
 	struct timer_list	sk_timer;
 	ktime_t			sk_stamp;
 	u16			sk_tsflags;
+	u32			sk_tskey;
 	struct socket		*sk_socket;
 	void			*sk_user_data;
 	struct page_frag	sk_frag;
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index f53879c0f590..1e861d2e1a31 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -20,9 +20,11 @@ enum {
 	SOF_TIMESTAMPING_SOFTWARE = (1<<4),
 	SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
 	SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
-	SOF_TIMESTAMPING_MASK =
-	(SOF_TIMESTAMPING_RAW_HARDWARE - 1) |
-	SOF_TIMESTAMPING_RAW_HARDWARE
+	SOF_TIMESTAMPING_OPT_ID = (1<<7),
+
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID,
+	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
+				 SOF_TIMESTAMPING_LAST
 };
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c9f68802e992..0df4f1d14c5a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3522,6 +3522,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 	serr->ee.ee_info = SCM_TSTAMP_SND;
+	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		serr->ee.ee_data = skb_shinfo(skb)->tskey;
 
 	err = sock_queue_err_skb(sk, skb);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 47c9377e14b9..1e0f1c63ad6b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -848,6 +848,9 @@ set_rcvbuf:
 			ret = -EINVAL;
 			break;
 		}
+		if (val & SOF_TIMESTAMPING_OPT_ID &&
+		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID))
+			sk->sk_tskey = 0;
 		sk->sk_tsflags = val;
 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
 			sock_enable_timestamp(sk,
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b16556836d66..215af2b155cb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -855,11 +855,15 @@ static int __ip_append_data(struct sock *sk,
 	unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
 	int csummode = CHECKSUM_NONE;
 	struct rtable *rt = (struct rtable *)cork->dst;
+	u32 tskey = 0;
 
 	skb = skb_peek_tail(queue);
 
 	exthdrlen = !skb ? rt->dst.header_len : 0;
 	mtu = cork->fragsize;
+	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
+	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		tskey = sk->sk_tskey++;
 
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
@@ -976,6 +980,8 @@ alloc_new_skb:
 			/* only the initial fragment is time stamped */
 			skb_shinfo(skb)->tx_flags = cork->tx_flags;
 			cork->tx_flags = 0;
+			skb_shinfo(skb)->tskey = tskey;
+			tskey = 0;
 
 			/*
 			 *	Find where to start putting bytes.
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f5dafe609f8b..315a55d66079 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1157,6 +1157,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int err;
 	int offset = 0;
 	__u8 tx_flags = 0;
+	u32 tskey = 0;
 
 	if (flags&MSG_PROBE)
 		return 0;
@@ -1272,8 +1273,12 @@ emsgsize:
 		}
 	}
 
-	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW)
+	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
 		sock_tx_timestamp(sk, &tx_flags);
+		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
+		    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+			tskey = sk->sk_tskey++;
+	}
 
 	/*
 	 * Let's try using as much space as possible.
@@ -1397,6 +1402,8 @@ alloc_new_skb:
 			/* Only the initial fragment is time stamped */
 			skb_shinfo(skb)->tx_flags = tx_flags;
 			tx_flags = 0;
+			skb_shinfo(skb)->tskey = tskey;
+			tskey = 0;
 
 			/*
 			 *	Find where to start putting bytes
-- 
cgit v1.2.3


From e7fd2885385157d46c85f282fc6d7d297db43e1f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 4 Aug 2014 22:11:48 -0400
Subject: net-timestamp: SCHED timestamp on entering packet scheduler

Kernel transmit latency is often incurred in the packet scheduler.
Introduce a new timestamp on transmission just before entering the
scheduler. When data travels through multiple devices (bonding,
tunneling, ...) each device will export an individual timestamp.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 11 +++++++++--
 include/uapi/linux/errqueue.h   |  1 +
 include/uapi/linux/net_tstamp.h |  3 ++-
 net/core/dev.c                  |  4 ++++
 net/core/skbuff.c               | 16 ++++++++++++----
 net/socket.c                    |  3 +++
 6 files changed, 31 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0e35b3af7317..50e1e9b3a5a5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -229,7 +229,7 @@ enum {
 	/* generate hardware time stamp */
 	SKBTX_HW_TSTAMP = 1 << 0,
 
-	/* generate software time stamp */
+	/* generate software time stamp when queueing packet to NIC */
 	SKBTX_SW_TSTAMP = 1 << 1,
 
 	/* device driver is going to provide hardware time stamp */
@@ -247,9 +247,12 @@ enum {
 	 * all frags to avoid possible bad checksum
 	 */
 	SKBTX_SHARED_FRAG = 1 << 5,
+
+	/* generate software time stamp when entering packet scheduling */
+	SKBTX_SCHED_TSTAMP = 1 << 6,
 };
 
-#define SKBTX_ANY_SW_TSTAMP	SKBTX_SW_TSTAMP
+#define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP | SKBTX_SCHED_TSTAMP)
 #define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
 
 /*
@@ -2695,6 +2698,10 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
 void skb_complete_tx_timestamp(struct sk_buff *skb,
 			       struct skb_shared_hwtstamps *hwtstamps);
 
+void __skb_tstamp_tx(struct sk_buff *orig_skb,
+		     struct skb_shared_hwtstamps *hwtstamps,
+		     struct sock *sk, int tstype);
+
 /**
  * skb_tstamp_tx - queue clone of skb with send time stamps
  * @orig_skb:	the original outgoing packet
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index accee72cae7c..17437cf297b7 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -39,6 +39,7 @@ struct scm_timestamping {
  */
 enum {
 	SCM_TSTAMP_SND,		/* driver passed skb to NIC, or HW */
+	SCM_TSTAMP_SCHED,	/* data entered the packet scheduler */
 };
 
 #endif /* _UAPI_LINUX_ERRQUEUE_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 1e861d2e1a31..60733845fcdd 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -21,8 +21,9 @@ enum {
 	SOF_TIMESTAMPING_SYS_HARDWARE = (1<<5),
 	SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
 	SOF_TIMESTAMPING_OPT_ID = (1<<7),
+	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_SCHED,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index b370230fe1d3..1c15b189c52b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
 #include <linux/hashtable.h>
 #include <linux/vmalloc.h>
 #include <linux/if_macvlan.h>
+#include <linux/errqueue.h>
 
 #include "net-sysfs.h"
 
@@ -2876,6 +2877,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 
 	skb_reset_mac_header(skb);
 
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
+		__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0df4f1d14c5a..9705c0732aab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3490,10 +3490,10 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
 
-void skb_tstamp_tx(struct sk_buff *orig_skb,
-		struct skb_shared_hwtstamps *hwtstamps)
+void __skb_tstamp_tx(struct sk_buff *orig_skb,
+		     struct skb_shared_hwtstamps *hwtstamps,
+		     struct sock *sk, int tstype)
 {
-	struct sock *sk = orig_skb->sk;
 	struct sock_exterr_skb *serr;
 	struct sk_buff *skb;
 	int err;
@@ -3521,7 +3521,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
-	serr->ee.ee_info = SCM_TSTAMP_SND;
+	serr->ee.ee_info = tstype;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
 		serr->ee.ee_data = skb_shinfo(skb)->tskey;
 
@@ -3530,6 +3530,14 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	if (err)
 		kfree_skb(skb);
 }
+EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
+
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+		   struct skb_shared_hwtstamps *hwtstamps)
+{
+	return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
+			       SCM_TSTAMP_SND);
+}
 EXPORT_SYMBOL_GPL(skb_tstamp_tx);
 
 void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
diff --git a/net/socket.c b/net/socket.c
index 255d9b802723..3a2778d71631 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -617,6 +617,9 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 		*tx_flags |= SKBTX_HW_TSTAMP;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
 		*tx_flags |= SKBTX_SW_TSTAMP;
+	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
+		*tx_flags |= SKBTX_SCHED_TSTAMP;
+
 	if (sock_flag(sk, SOCK_WIFI_STATUS))
 		*tx_flags |= SKBTX_WIFI_STATUS;
 }
-- 
cgit v1.2.3


From e1c8a607b28190cd09a271508aa3025d3c2f312e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 4 Aug 2014 22:11:50 -0400
Subject: net-timestamp: ACK timestamp for bytestreams

Add SOF_TIMESTAMPING_TX_ACK, a request for a tstamp when the last byte
in the send() call is acknowledged. It implements the feature for TCP.

The timestamp is generated when the TCP socket cumulative ACK is moved
beyond the tracked seqno for the first time. The feature ignores SACK
and FACK, because those acknowledge the specific byte, but not
necessarily the entire contents of the buffer up to that byte.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 7 ++++++-
 include/uapi/linux/errqueue.h   | 1 +
 include/uapi/linux/net_tstamp.h | 3 ++-
 net/ipv4/tcp_input.c            | 6 ++++++
 net/socket.c                    | 2 ++
 5 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 50e1e9b3a5a5..11c270551d25 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -250,9 +250,14 @@ enum {
 
 	/* generate software time stamp when entering packet scheduling */
 	SKBTX_SCHED_TSTAMP = 1 << 6,
+
+	/* generate software timestamp on peer data acknowledgment */
+	SKBTX_ACK_TSTAMP = 1 << 7,
 };
 
-#define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP | SKBTX_SCHED_TSTAMP)
+#define SKBTX_ANY_SW_TSTAMP	(SKBTX_SW_TSTAMP    | \
+				 SKBTX_SCHED_TSTAMP | \
+				 SKBTX_ACK_TSTAMP)
 #define SKBTX_ANY_TSTAMP	(SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
 
 /*
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 17437cf297b7..07bdce1f444a 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -40,6 +40,7 @@ struct scm_timestamping {
 enum {
 	SCM_TSTAMP_SND,		/* driver passed skb to NIC, or HW */
 	SCM_TSTAMP_SCHED,	/* data entered the packet scheduler */
+	SCM_TSTAMP_ACK,		/* data acknowledged by peer */
 };
 
 #endif /* _UAPI_LINUX_ERRQUEUE_H */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 60733845fcdd..ff354021bb69 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -22,8 +22,9 @@ enum {
 	SOF_TIMESTAMPING_RAW_HARDWARE = (1<<6),
 	SOF_TIMESTAMPING_OPT_ID = (1<<7),
 	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
+	SOF_TIMESTAMPING_TX_ACK = (1<<9),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_SCHED,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6a2984507755..a3d47af01906 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -74,6 +74,7 @@
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
 #include <net/netdma.h>
+#include <linux/errqueue.h>
 
 int sysctl_tcp_timestamps __read_mostly = 1;
 int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -3106,6 +3107,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			tp->retrans_stamp = 0;
 		}
 
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_ACK_TSTAMP) &&
+		    between(skb_shinfo(skb)->tskey, prior_snd_una,
+			    tp->snd_una + 1))
+			__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+
 		if (!fully_acked)
 			break;
 
diff --git a/net/socket.c b/net/socket.c
index 3a2778d71631..ae89569a2db5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -619,6 +619,8 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 		*tx_flags |= SKBTX_SW_TSTAMP;
 	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
 		*tx_flags |= SKBTX_SCHED_TSTAMP;
+	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
+		*tx_flags |= SKBTX_ACK_TSTAMP;
 
 	if (sock_flag(sk, SOCK_WIFI_STATUS))
 		*tx_flags |= SKBTX_WIFI_STATUS;
-- 
cgit v1.2.3


From 753a2ad54ef45e3417a9d49537c2b42b04a2e1be Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Thu, 7 Aug 2014 00:17:09 +0200
Subject: net: reallocate new socket option number for IPV6_AUTOFLOWLABEL

cb1ce2e ("ipv6: Implement automatic flow label generation on transmit")
accidentally uses socket option 64, which is already used by ip6tables:

 IP6T_SO_SET_REPLACE / IP6T_SO_GET_INFO               64
 IP6T_SO_SET_ADD_COUNTERS / IP6T_SO_GET_ENTRIES       65

There is comment include/uapi/linux/in6.h warning about that.

Allocate 70 for this, which seems to be unused instead.

Cc: Tom Herbert <therbert@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 22b7a69619d8..74a2a1773494 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -233,7 +233,6 @@ struct in6_flowlabel_req {
 #if 0	/* not yet */
 #define IPV6_USE_MIN_MTU	63
 #endif
-#define IPV6_AUTOFLOWLABEL	64
 
 /*
  * Netfilter (1)
@@ -262,6 +261,7 @@ struct in6_flowlabel_req {
  * IP6T_SO_ORIGINAL_DST		80
  */
 
+#define IPV6_AUTOFLOWLABEL	70
 /* RFC5014: Source address selection */
 #define IPV6_ADDR_PREFERENCES	72
 
-- 
cgit v1.2.3


From 40e041a2c858b3caefc757e26cb85bfceae5062b Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:27 -0700
Subject: shm: add sealing API

If two processes share a common memory region, they usually want some
guarantees to allow safe access. This often includes:
  - one side cannot overwrite data while the other reads it
  - one side cannot shrink the buffer while the other accesses it
  - one side cannot grow the buffer beyond previously set boundaries

If there is a trust-relationship between both parties, there is no need
for policy enforcement.  However, if there's no trust relationship (eg.,
for general-purpose IPC) sharing memory-regions is highly fragile and
often not possible without local copies.  Look at the following two
use-cases:

  1) A graphics client wants to share its rendering-buffer with a
     graphics-server. The memory-region is allocated by the client for
     read/write access and a second FD is passed to the server. While
     scanning out from the memory region, the server has no guarantee that
     the client doesn't shrink the buffer at any time, requiring rather
     cumbersome SIGBUS handling.
  2) A process wants to perform an RPC on another process. To avoid huge
     bandwidth consumption, zero-copy is preferred. After a message is
     assembled in-memory and a FD is passed to the remote side, both sides
     want to be sure that neither modifies this shared copy, anymore. The
     source may have put sensible data into the message without a separate
     copy and the target may want to parse the message inline, to avoid a
     local copy.

While SIGBUS handling, POSIX mandatory locking and MAP_DENYWRITE provide
ways to achieve most of this, the first one is unproportionally ugly to
use in libraries and the latter two are broken/racy or even disabled due
to denial of service attacks.

This patch introduces the concept of SEALING.  If you seal a file, a
specific set of operations is blocked on that file forever.  Unlike locks,
seals can only be set, never removed.  Hence, once you verified a specific
set of seals is set, you're guaranteed that no-one can perform the blocked
operations on this file, anymore.

An initial set of SEALS is introduced by this patch:
  - SHRINK: If SEAL_SHRINK is set, the file in question cannot be reduced
            in size. This affects ftruncate() and open(O_TRUNC).
  - GROW: If SEAL_GROW is set, the file in question cannot be increased
          in size. This affects ftruncate(), fallocate() and write().
  - WRITE: If SEAL_WRITE is set, no write operations (besides resizing)
           are possible. This affects fallocate(PUNCH_HOLE), mmap() and
           write().
  - SEAL: If SEAL_SEAL is set, no further seals can be added to a file.
          This basically prevents the F_ADD_SEAL operation on a file and
          can be set to prevent others from adding further seals that you
          don't want.

The described use-cases can easily use these seals to provide safe use
without any trust-relationship:

  1) The graphics server can verify that a passed file-descriptor has
     SEAL_SHRINK set. This allows safe scanout, while the client is
     allowed to increase buffer size for window-resizing on-the-fly.
     Concurrent writes are explicitly allowed.
  2) For general-purpose IPC, both processes can verify that SEAL_SHRINK,
     SEAL_GROW and SEAL_WRITE are set. This guarantees that neither
     process can modify the data while the other side parses it.
     Furthermore, it guarantees that even with writable FDs passed to the
     peer, it cannot increase the size to hit memory-limits of the source
     process (in case the file-storage is accounted to the source).

The new API is an extension to fcntl(), adding two new commands:
  F_GET_SEALS: Return a bitset describing the seals on the file. This
               can be called on any FD if the underlying file supports
               sealing.
  F_ADD_SEALS: Change the seals of a given file. This requires WRITE
               access to the file and F_SEAL_SEAL may not already be set.
               Furthermore, the underlying file must support sealing and
               there may not be any existing shared mapping of that file.
               Otherwise, EBADF/EPERM is returned.
               The given seals are _added_ to the existing set of seals
               on the file. You cannot remove seals again.

The fcntl() handler is currently specific to shmem and disabled on all
files. A file needs to explicitly support sealing for this interface to
work. A separate syscall is added in a follow-up, which creates files that
support sealing. There is no intention to support this on other
file-systems. Semantics are unclear for non-volatile files and we lack any
use-case right now. Therefore, the implementation is specific to shmem.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fcntl.c                 |   5 ++
 include/linux/shmem_fs.h   |  17 ++++++
 include/uapi/linux/fcntl.h |  15 +++++
 mm/shmem.c                 | 143 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 180 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72c82f69b01b..22d1c3df61ac 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/shmem_fs.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_GETPIPE_SZ:
 		err = pipe_fcntl(filp, cmd, arg);
 		break;
+	case F_ADD_SEALS:
+	case F_GET_SEALS:
+		err = shmem_fcntl(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c2d29f..50777b5b1e4c 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -1,6 +1,7 @@
 #ifndef __SHMEM_FS_H
 #define __SHMEM_FS_H
 
+#include <linux/file.h>
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
 #include <linux/pagemap.h>
@@ -11,6 +12,7 @@
 
 struct shmem_inode_info {
 	spinlock_t		lock;
+	unsigned int		seals;		/* shmem seals */
 	unsigned long		flags;
 	unsigned long		alloced;	/* data pages alloced to file */
 	union {
@@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page(
 					mapping_gfp_mask(mapping));
 }
 
+#ifdef CONFIG_TMPFS
+
+extern int shmem_add_seals(struct file *file, unsigned int seals);
+extern int shmem_get_seals(struct file *file);
+extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
+{
+	return -EINVAL;
+}
+
+#endif
+
 #endif
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 074b886c6be0..beed138bd359 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -27,6 +27,21 @@
 #define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
 #define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
 
+/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS	(F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS	(F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL	0x0001	/* prevent further seals from being set */
+#define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
+#define F_SEAL_GROW	0x0004	/* prevent file from growing */
+#define F_SEAL_WRITE	0x0008	/* prevent writes */
+/* (1U << 31) is reserved for signed error codes */
+
 /*
  * Types of directory notifications that may be requested.
  */
diff --git a/mm/shmem.c b/mm/shmem.c
index 6dc80d298f9d..8b43bb7a4efe 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,6 +66,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/fcntl.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -547,6 +548,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	int error;
 
 	error = inode_change_ok(inode, attr);
@@ -557,6 +559,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 		loff_t oldsize = inode->i_size;
 		loff_t newsize = attr->ia_size;
 
+		/* protected by i_mutex */
+		if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
+		    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
+			return -EPERM;
+
 		if (newsize != oldsize) {
 			error = shmem_reacct_size(SHMEM_I(inode)->flags,
 					oldsize, newsize);
@@ -1412,6 +1419,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		info = SHMEM_I(inode);
 		memset(info, 0, (char *)inode - (char *)info);
 		spin_lock_init(&info->lock);
+		info->seals = F_SEAL_SEAL;
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
@@ -1470,7 +1478,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	struct inode *inode = mapping->host;
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+	/* i_mutex is held by caller */
+	if (unlikely(info->seals)) {
+		if (info->seals & F_SEAL_WRITE)
+			return -EPERM;
+		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
+			return -EPERM;
+	}
+
 	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
 }
 
@@ -1808,11 +1826,125 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
+static int shmem_wait_for_pins(struct address_space *mapping)
+{
+	return 0;
+}
+
+#define F_ALL_SEALS (F_SEAL_SEAL | \
+		     F_SEAL_SHRINK | \
+		     F_SEAL_GROW | \
+		     F_SEAL_WRITE)
+
+int shmem_add_seals(struct file *file, unsigned int seals)
+{
+	struct inode *inode = file_inode(file);
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	int error;
+
+	/*
+	 * SEALING
+	 * Sealing allows multiple parties to share a shmem-file but restrict
+	 * access to a specific subset of file operations. Seals can only be
+	 * added, but never removed. This way, mutually untrusted parties can
+	 * share common memory regions with a well-defined policy. A malicious
+	 * peer can thus never perform unwanted operations on a shared object.
+	 *
+	 * Seals are only supported on special shmem-files and always affect
+	 * the whole underlying inode. Once a seal is set, it may prevent some
+	 * kinds of access to the file. Currently, the following seals are
+	 * defined:
+	 *   SEAL_SEAL: Prevent further seals from being set on this file
+	 *   SEAL_SHRINK: Prevent the file from shrinking
+	 *   SEAL_GROW: Prevent the file from growing
+	 *   SEAL_WRITE: Prevent write access to the file
+	 *
+	 * As we don't require any trust relationship between two parties, we
+	 * must prevent seals from being removed. Therefore, sealing a file
+	 * only adds a given set of seals to the file, it never touches
+	 * existing seals. Furthermore, the "setting seals"-operation can be
+	 * sealed itself, which basically prevents any further seal from being
+	 * added.
+	 *
+	 * Semantics of sealing are only defined on volatile files. Only
+	 * anonymous shmem files support sealing. More importantly, seals are
+	 * never written to disk. Therefore, there's no plan to support it on
+	 * other file types.
+	 */
+
+	if (file->f_op != &shmem_file_operations)
+		return -EINVAL;
+	if (!(file->f_mode & FMODE_WRITE))
+		return -EPERM;
+	if (seals & ~(unsigned int)F_ALL_SEALS)
+		return -EINVAL;
+
+	mutex_lock(&inode->i_mutex);
+
+	if (info->seals & F_SEAL_SEAL) {
+		error = -EPERM;
+		goto unlock;
+	}
+
+	if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
+		error = mapping_deny_writable(file->f_mapping);
+		if (error)
+			goto unlock;
+
+		error = shmem_wait_for_pins(file->f_mapping);
+		if (error) {
+			mapping_allow_writable(file->f_mapping);
+			goto unlock;
+		}
+	}
+
+	info->seals |= seals;
+	error = 0;
+
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return error;
+}
+EXPORT_SYMBOL_GPL(shmem_add_seals);
+
+int shmem_get_seals(struct file *file)
+{
+	if (file->f_op != &shmem_file_operations)
+		return -EINVAL;
+
+	return SHMEM_I(file_inode(file))->seals;
+}
+EXPORT_SYMBOL_GPL(shmem_get_seals);
+
+long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	long error;
+
+	switch (cmd) {
+	case F_ADD_SEALS:
+		/* disallow upper 32bit */
+		if (arg > UINT_MAX)
+			return -EINVAL;
+
+		error = shmem_add_seals(file, arg);
+		break;
+	case F_GET_SEALS:
+		error = shmem_get_seals(file);
+		break;
+	default:
+		error = -EINVAL;
+		break;
+	}
+
+	return error;
+}
+
 static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 							 loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	struct shmem_falloc shmem_falloc;
 	pgoff_t start, index, end;
 	int error;
@@ -1828,6 +1960,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
 		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
+		/* protected by i_mutex */
+		if (info->seals & F_SEAL_WRITE) {
+			error = -EPERM;
+			goto out;
+		}
+
 		shmem_falloc.waitq = &shmem_falloc_waitq;
 		shmem_falloc.start = unmap_start >> PAGE_SHIFT;
 		shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1854,6 +1992,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 	if (error)
 		goto out;
 
+	if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
+		error = -EPERM;
+		goto out;
+	}
+
 	start = offset >> PAGE_CACHE_SHIFT;
 	end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	/* Try to avoid a swapstorm if len is impossible to satisfy */
-- 
cgit v1.2.3


From 9183df25fe7b194563db3fec6dc3202a5855839c Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Fri, 8 Aug 2014 14:25:29 -0700
Subject: shm: add memfd_create() syscall

memfd_create() is similar to mmap(MAP_ANON), but returns a file-descriptor
that you can pass to mmap().  It can support sealing and avoids any
connection to user-visible mount-points.  Thus, it's not subject to quotas
on mounted file-systems, but can be used like malloc()'ed memory, but with
a file-descriptor to it.

memfd_create() returns the raw shmem file, so calls like ftruncate() can
be used to modify the underlying inode.  Also calls like fstat() will
return proper information and mark the file as regular file.  If you want
sealing, you can specify MFD_ALLOW_SEALING.  Otherwise, sealing is not
supported (like on all other regular files).

Compared to O_TMPFILE, it does not require a tmpfs mount-point and is not
subject to a filesystem size limit.  It is still properly accounted to
memcg limits, though, and to the same overcommit or no-overcommit
accounting as all user memory.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Ryan Lortie <desrt@desrt.ca>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Daniel Mack <zonque@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/syscalls/syscall_32.tbl |  1 +
 arch/x86/syscalls/syscall_64.tbl |  1 +
 include/linux/syscalls.h         |  1 +
 include/uapi/linux/memfd.h       |  8 +++++
 kernel/sys_ni.c                  |  1 +
 mm/shmem.c                       | 73 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 85 insertions(+)
 create mode 100644 include/uapi/linux/memfd.h

(limited to 'include/uapi/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d1b4a119d4a5..028b78168d85 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -362,3 +362,4 @@
 353	i386	renameat2		sys_renameat2
 354	i386	seccomp			sys_seccomp
 355	i386	getrandom		sys_getrandom
+356	i386	memfd_create		sys_memfd_create
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 252c804bb1aa..ca2b9aa78c81 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,6 +325,7 @@
 316	common	renameat2		sys_renameat2
 317	common	seccomp			sys_seccomp
 318	common	getrandom		sys_getrandom
+319	common	memfd_create		sys_memfd_create
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 701daff5d899..15a069425cbf 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -802,6 +802,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
 asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
 asmlinkage long sys_eventfd(unsigned int count);
 asmlinkage long sys_eventfd2(unsigned int count, int flags);
+asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
 asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
 asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
 asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..534e364bda92
--- /dev/null
+++ b/include/uapi/linux/memfd.h
@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MEMFD_H
+#define _UAPI_LINUX_MEMFD_H
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC		0x0001U
+#define MFD_ALLOW_SEALING	0x0002U
+
+#endif /* _UAPI_LINUX_MEMFD_H */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2904a2105914..1f79e3714533 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -197,6 +197,7 @@ cond_syscall(compat_sys_timerfd_settime);
 cond_syscall(compat_sys_timerfd_gettime);
 cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
+cond_syscall(sys_memfd_create);
 
 /* performance counters: */
 cond_syscall(sys_perf_event_open);
diff --git a/mm/shmem.c b/mm/shmem.c
index 8b43bb7a4efe..4a5498795a2b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,7 +66,9 @@ static struct vfsmount *shm_mnt;
 #include <linux/highmem.h>
 #include <linux/seq_file.h>
 #include <linux/magic.h>
+#include <linux/syscalls.h>
 #include <linux/fcntl.h>
+#include <uapi/linux/memfd.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -2732,6 +2734,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
 	shmem_show_mpol(seq, sbinfo->mpol);
 	return 0;
 }
+
+#define MFD_NAME_PREFIX "memfd:"
+#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
+#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
+
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
+
+SYSCALL_DEFINE2(memfd_create,
+		const char __user *, uname,
+		unsigned int, flags)
+{
+	struct shmem_inode_info *info;
+	struct file *file;
+	int fd, error;
+	char *name;
+	long len;
+
+	if (flags & ~(unsigned int)MFD_ALL_FLAGS)
+		return -EINVAL;
+
+	/* length includes terminating zero */
+	len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
+	if (len <= 0)
+		return -EFAULT;
+	if (len > MFD_NAME_MAX_LEN + 1)
+		return -EINVAL;
+
+	name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
+	if (!name)
+		return -ENOMEM;
+
+	strcpy(name, MFD_NAME_PREFIX);
+	if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
+		error = -EFAULT;
+		goto err_name;
+	}
+
+	/* terminating-zero may have changed after strnlen_user() returned */
+	if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
+		error = -EFAULT;
+		goto err_name;
+	}
+
+	fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
+	if (fd < 0) {
+		error = fd;
+		goto err_name;
+	}
+
+	file = shmem_file_setup(name, 0, VM_NORESERVE);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_fd;
+	}
+	info = SHMEM_I(file_inode(file));
+	file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
+	file->f_flags |= O_RDWR | O_LARGEFILE;
+	if (flags & MFD_ALLOW_SEALING)
+		info->seals &= ~F_SEAL_SEAL;
+
+	fd_install(fd, file);
+	kfree(name);
+	return fd;
+
+err_fd:
+	put_unused_fd(fd);
+err_name:
+	kfree(name);
+	return error;
+}
+
 #endif /* CONFIG_TMPFS */
 
 static void shmem_put_super(struct super_block *sb)
-- 
cgit v1.2.3


From cb1052581e2bddd6096544f3f944f4e7fdad4c7f Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Fri, 8 Aug 2014 14:25:57 -0700
Subject: kexec: implementation of new syscall kexec_file_load

Previous patch provided the interface definition and this patch prvides
implementation of new syscall.

Previously segment list was prepared in user space.  Now user space just
passes kernel fd, initrd fd and command line and kernel will create a
segment list internally.

This patch contains generic part of the code.  Actual segment preparation
and loading is done by arch and image specific loader.  Which comes in
next patch.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Cc: Greg Kroah-Hartman <greg@kroah.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: WANG Chao <chaowang@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/machine_kexec_64.c |  45 ++++
 include/linux/kexec.h              |  53 ++++
 include/uapi/linux/kexec.h         |  11 +
 kernel/kexec.c                     | 483 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 587 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..c8875b5545e1 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -22,6 +22,10 @@
 #include <asm/mmu_context.h>
 #include <asm/debugreg.h>
 
+static struct kexec_file_ops *kexec_file_loaders[] = {
+		NULL,
+};
+
 static void free_transition_pgtable(struct kimage *image)
 {
 	free_page((unsigned long)image->arch.pud);
@@ -283,3 +287,44 @@ void arch_crash_save_vmcoreinfo(void)
 			      (unsigned long)&_text - __START_KERNEL);
 }
 
+/* arch-dependent functionality related to kexec file-based syscall */
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+				  unsigned long buf_len)
+{
+	int i, ret = -ENOEXEC;
+	struct kexec_file_ops *fops;
+
+	for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+		fops = kexec_file_loaders[i];
+		if (!fops || !fops->probe)
+			continue;
+
+		ret = fops->probe(buf, buf_len);
+		if (!ret) {
+			image->fops = fops;
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+	if (!image->fops || !image->fops->load)
+		return ERR_PTR(-ENOEXEC);
+
+	return image->fops->load(image, image->kernel_buf,
+				 image->kernel_buf_len, image->initrd_buf,
+				 image->initrd_buf_len, image->cmdline_buf,
+				 image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	if (!image->fops || !image->fops->cleanup)
+		return 0;
+
+	return image->fops->cleanup(image);
+}
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 66d56ac0f64c..8e80901e466f 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -121,13 +121,57 @@ struct kimage {
 #define KEXEC_TYPE_DEFAULT 0
 #define KEXEC_TYPE_CRASH   1
 	unsigned int preserve_context : 1;
+	/* If set, we are using file mode kexec syscall */
+	unsigned int file_mode:1;
 
 #ifdef ARCH_HAS_KIMAGE_ARCH
 	struct kimage_arch arch;
 #endif
+
+	/* Additional fields for file based kexec syscall */
+	void *kernel_buf;
+	unsigned long kernel_buf_len;
+
+	void *initrd_buf;
+	unsigned long initrd_buf_len;
+
+	char *cmdline_buf;
+	unsigned long cmdline_buf_len;
+
+	/* File operations provided by image loader */
+	struct kexec_file_ops *fops;
+
+	/* Image loader handling the kernel can store a pointer here */
+	void *image_loader_data;
 };
 
+/*
+ * Keeps track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+	struct kimage *image;
+	char *buffer;
+	unsigned long bufsz;
+	unsigned long memsz;
+	unsigned long buf_align;
+	unsigned long buf_min;
+	unsigned long buf_max;
+	bool top_down;		/* allocate from top of memory hole */
+};
 
+typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
+typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
+			     unsigned long kernel_len, char *initrd,
+			     unsigned long initrd_len, char *cmdline,
+			     unsigned long cmdline_len);
+typedef int (kexec_cleanup_t)(struct kimage *image);
+
+struct kexec_file_ops {
+	kexec_probe_t *probe;
+	kexec_load_t *load;
+	kexec_cleanup_t *cleanup;
+};
 
 /* kexec interface functions */
 extern void machine_kexec(struct kimage *image);
@@ -138,6 +182,11 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
 					struct kexec_segment __user *segments,
 					unsigned long flags);
 extern int kernel_kexec(void);
+extern int kexec_add_buffer(struct kimage *image, char *buffer,
+			    unsigned long bufsz, unsigned long memsz,
+			    unsigned long buf_align, unsigned long buf_min,
+			    unsigned long buf_max, bool top_down,
+			    unsigned long *load_addr);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
 extern void crash_kexec(struct pt_regs *);
@@ -188,6 +237,10 @@ extern int kexec_load_disabled;
 #define KEXEC_FLAGS    (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
 #endif
 
+/* List of defined/legal kexec file flags */
+#define KEXEC_FILE_FLAGS	(KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
+				 KEXEC_FILE_NO_INITRAMFS)
+
 #define VMCOREINFO_BYTES           (4096)
 #define VMCOREINFO_NOTE_NAME       "VMCOREINFO"
 #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index d6629d49a243..6925f5b42f89 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -13,6 +13,17 @@
 #define KEXEC_PRESERVE_CONTEXT	0x00000002
 #define KEXEC_ARCH_MASK		0xffff0000
 
+/*
+ * Kexec file load interface flags.
+ * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image.
+ * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
+ * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
+ *                           fd field.
+ */
+#define KEXEC_FILE_UNLOAD	0x00000001
+#define KEXEC_FILE_ON_CRASH	0x00000002
+#define KEXEC_FILE_NO_INITRAMFS	0x00000004
+
 /* These values match the ELF architecture values.
  * Unless there is a good reason that should continue to be the case.
  */
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ec4386c1b94f..9b46219254dd 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#define pr_fmt(fmt)	"kexec: " fmt
+
 #include <linux/capability.h>
 #include <linux/mm.h>
 #include <linux/file.h>
@@ -327,6 +329,221 @@ out_free_image:
 	return ret;
 }
 
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
+{
+	struct fd f = fdget(fd);
+	int ret;
+	struct kstat stat;
+	loff_t pos;
+	ssize_t bytes = 0;
+
+	if (!f.file)
+		return -EBADF;
+
+	ret = vfs_getattr(&f.file->f_path, &stat);
+	if (ret)
+		goto out;
+
+	if (stat.size > INT_MAX) {
+		ret = -EFBIG;
+		goto out;
+	}
+
+	/* Don't hand 0 to vmalloc, it whines. */
+	if (stat.size == 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	*buf = vmalloc(stat.size);
+	if (!*buf) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	pos = 0;
+	while (pos < stat.size) {
+		bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+				    stat.size - pos);
+		if (bytes < 0) {
+			vfree(*buf);
+			ret = bytes;
+			goto out;
+		}
+
+		if (bytes == 0)
+			break;
+		pos += bytes;
+	}
+
+	if (pos != stat.size) {
+		ret = -EBADF;
+		vfree(*buf);
+		goto out;
+	}
+
+	*buf_len = pos;
+out:
+	fdput(f);
+	return ret;
+}
+
+/* Architectures can provide this probe function */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+					 unsigned long buf_len)
+{
+	return -ENOEXEC;
+}
+
+void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+{
+	return ERR_PTR(-ENOEXEC);
+}
+
+void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere.
+ */
+static void kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->kernel_buf);
+	image->kernel_buf = NULL;
+
+	vfree(image->initrd_buf);
+	image->initrd_buf = NULL;
+
+	kfree(image->cmdline_buf);
+	image->cmdline_buf = NULL;
+
+	/* See if architecture has anything to cleanup post load */
+	arch_kimage_file_post_load_cleanup(image);
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int
+kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
+			     const char __user *cmdline_ptr,
+			     unsigned long cmdline_len, unsigned flags)
+{
+	int ret = 0;
+	void *ldata;
+
+	ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+				&image->kernel_buf_len);
+	if (ret)
+		return ret;
+
+	/* Call arch image probe handlers */
+	ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+					    image->kernel_buf_len);
+
+	if (ret)
+		goto out;
+
+	/* It is possible that there no initramfs is being loaded */
+	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
+		ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+					&image->initrd_buf_len);
+		if (ret)
+			goto out;
+	}
+
+	if (cmdline_len) {
+		image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
+		if (!image->cmdline_buf) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
+				     cmdline_len);
+		if (ret) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		image->cmdline_buf_len = cmdline_len;
+
+		/* command line should be a string with last byte null */
+		if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* Call arch image load handlers */
+	ldata = arch_kexec_kernel_image_load(image);
+
+	if (IS_ERR(ldata)) {
+		ret = PTR_ERR(ldata);
+		goto out;
+	}
+
+	image->image_loader_data = ldata;
+out:
+	/* In case of error, free up all allocated memory in this function */
+	if (ret)
+		kimage_file_post_load_cleanup(image);
+	return ret;
+}
+
+static int
+kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
+		       int initrd_fd, const char __user *cmdline_ptr,
+		       unsigned long cmdline_len, unsigned long flags)
+{
+	int ret;
+	struct kimage *image;
+
+	image = do_kimage_alloc_init();
+	if (!image)
+		return -ENOMEM;
+
+	image->file_mode = 1;
+
+	ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+					   cmdline_ptr, cmdline_len, flags);
+	if (ret)
+		goto out_free_image;
+
+	ret = sanity_check_segment_list(image);
+	if (ret)
+		goto out_free_post_load_bufs;
+
+	ret = -ENOMEM;
+	image->control_code_page = kimage_alloc_control_pages(image,
+					   get_order(KEXEC_CONTROL_PAGE_SIZE));
+	if (!image->control_code_page) {
+		pr_err("Could not allocate control_code_buffer\n");
+		goto out_free_post_load_bufs;
+	}
+
+	image->swap_page = kimage_alloc_control_pages(image, 0);
+	if (!image->swap_page) {
+		pr_err(KERN_ERR "Could not allocate swap buffer\n");
+		goto out_free_control_pages;
+	}
+
+	*rimage = image;
+	return 0;
+out_free_control_pages:
+	kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+	kimage_file_post_load_cleanup(image);
+	kfree(image->image_loader_data);
+out_free_image:
+	kfree(image);
+	return ret;
+}
+
 static int kimage_is_destination_range(struct kimage *image,
 					unsigned long start,
 					unsigned long end)
@@ -644,6 +861,16 @@ static void kimage_free(struct kimage *image)
 
 	/* Free the kexec control pages... */
 	kimage_free_page_list(&image->control_pages);
+
+	kfree(image->image_loader_data);
+
+	/*
+	 * Free up any temporary buffers allocated. This might hit if
+	 * error occurred much later after buffer allocation.
+	 */
+	if (image->file_mode)
+		kimage_file_post_load_cleanup(image);
+
 	kfree(image);
 }
 
@@ -772,10 +999,14 @@ static int kimage_load_normal_segment(struct kimage *image,
 	unsigned long maddr;
 	size_t ubytes, mbytes;
 	int result;
-	unsigned char __user *buf;
+	unsigned char __user *buf = NULL;
+	unsigned char *kbuf = NULL;
 
 	result = 0;
-	buf = segment->buf;
+	if (image->file_mode)
+		kbuf = segment->kbuf;
+	else
+		buf = segment->buf;
 	ubytes = segment->bufsz;
 	mbytes = segment->memsz;
 	maddr = segment->mem;
@@ -807,7 +1038,11 @@ static int kimage_load_normal_segment(struct kimage *image,
 				PAGE_SIZE - (maddr & ~PAGE_MASK));
 		uchunk = min(ubytes, mchunk);
 
-		result = copy_from_user(ptr, buf, uchunk);
+		/* For file based kexec, source pages are in kernel memory */
+		if (image->file_mode)
+			memcpy(ptr, kbuf, uchunk);
+		else
+			result = copy_from_user(ptr, buf, uchunk);
 		kunmap(page);
 		if (result) {
 			result = -EFAULT;
@@ -815,7 +1050,10 @@ static int kimage_load_normal_segment(struct kimage *image,
 		}
 		ubytes -= uchunk;
 		maddr  += mchunk;
-		buf    += mchunk;
+		if (image->file_mode)
+			kbuf += mchunk;
+		else
+			buf += mchunk;
 		mbytes -= mchunk;
 	}
 out:
@@ -1062,7 +1300,72 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
 		unsigned long, cmdline_len, const char __user *, cmdline_ptr,
 		unsigned long, flags)
 {
-	return -ENOSYS;
+	int ret = 0, i;
+	struct kimage **dest_image, *image;
+
+	/* We only trust the superuser with rebooting the system. */
+	if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+		return -EPERM;
+
+	/* Make sure we have a legal set of flags */
+	if (flags != (flags & KEXEC_FILE_FLAGS))
+		return -EINVAL;
+
+	image = NULL;
+
+	if (!mutex_trylock(&kexec_mutex))
+		return -EBUSY;
+
+	dest_image = &kexec_image;
+	if (flags & KEXEC_FILE_ON_CRASH)
+		dest_image = &kexec_crash_image;
+
+	if (flags & KEXEC_FILE_UNLOAD)
+		goto exchange;
+
+	/*
+	 * In case of crash, new kernel gets loaded in reserved region. It is
+	 * same memory where old crash kernel might be loaded. Free any
+	 * current crash dump kernel before we corrupt it.
+	 */
+	if (flags & KEXEC_FILE_ON_CRASH)
+		kimage_free(xchg(&kexec_crash_image, NULL));
+
+	ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+				     cmdline_len, flags);
+	if (ret)
+		goto out;
+
+	ret = machine_kexec_prepare(image);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		struct kexec_segment *ksegment;
+
+		ksegment = &image->segment[i];
+		pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+			 i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+			 ksegment->memsz);
+
+		ret = kimage_load_segment(image, &image->segment[i]);
+		if (ret)
+			goto out;
+	}
+
+	kimage_terminate(image);
+
+	/*
+	 * Free up any temporary buffers allocated which are not needed
+	 * after image has been loaded
+	 */
+	kimage_file_post_load_cleanup(image);
+exchange:
+	image = xchg(dest_image, image);
+out:
+	mutex_unlock(&kexec_mutex);
+	kimage_free(image);
+	return ret;
 }
 
 void crash_kexec(struct pt_regs *regs)
@@ -1620,6 +1923,176 @@ static int __init crash_save_vmcoreinfo_init(void)
 
 subsys_initcall(crash_save_vmcoreinfo_init);
 
+static int __kexec_add_segment(struct kimage *image, char *buf,
+			       unsigned long bufsz, unsigned long mem,
+			       unsigned long memsz)
+{
+	struct kexec_segment *ksegment;
+
+	ksegment = &image->segment[image->nr_segments];
+	ksegment->kbuf = buf;
+	ksegment->bufsz = bufsz;
+	ksegment->mem = mem;
+	ksegment->memsz = memsz;
+	image->nr_segments++;
+
+	return 0;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+				    struct kexec_buf *kbuf)
+{
+	struct kimage *image = kbuf->image;
+	unsigned long temp_start, temp_end;
+
+	temp_end = min(end, kbuf->buf_max);
+	temp_start = temp_end - kbuf->memsz;
+
+	do {
+		/* align down start */
+		temp_start = temp_start & (~(kbuf->buf_align - 1));
+
+		if (temp_start < start || temp_start < kbuf->buf_min)
+			return 0;
+
+		temp_end = temp_start + kbuf->memsz - 1;
+
+		/*
+		 * Make sure this does not conflict with any of existing
+		 * segments
+		 */
+		if (kimage_is_destination_range(image, temp_start, temp_end)) {
+			temp_start = temp_start - PAGE_SIZE;
+			continue;
+		}
+
+		/* We found a suitable memory range */
+		break;
+	} while (1);
+
+	/* If we are here, we found a suitable memory range */
+	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+			    kbuf->memsz);
+
+	/* Success, stop navigating through remaining System RAM ranges */
+	return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+				     struct kexec_buf *kbuf)
+{
+	struct kimage *image = kbuf->image;
+	unsigned long temp_start, temp_end;
+
+	temp_start = max(start, kbuf->buf_min);
+
+	do {
+		temp_start = ALIGN(temp_start, kbuf->buf_align);
+		temp_end = temp_start + kbuf->memsz - 1;
+
+		if (temp_end > end || temp_end > kbuf->buf_max)
+			return 0;
+		/*
+		 * Make sure this does not conflict with any of existing
+		 * segments
+		 */
+		if (kimage_is_destination_range(image, temp_start, temp_end)) {
+			temp_start = temp_start + PAGE_SIZE;
+			continue;
+		}
+
+		/* We found a suitable memory range */
+		break;
+	} while (1);
+
+	/* If we are here, we found a suitable memory range */
+	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+			    kbuf->memsz);
+
+	/* Success, stop navigating through remaining System RAM ranges */
+	return 1;
+}
+
+static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+{
+	struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+	unsigned long sz = end - start + 1;
+
+	/* Returning 0 will take to next memory range */
+	if (sz < kbuf->memsz)
+		return 0;
+
+	if (end < kbuf->buf_min || start > kbuf->buf_max)
+		return 0;
+
+	/*
+	 * Allocate memory top down with-in ram range. Otherwise bottom up
+	 * allocation.
+	 */
+	if (kbuf->top_down)
+		return locate_mem_hole_top_down(start, end, kbuf);
+	return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper function for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
+		     unsigned long memsz, unsigned long buf_align,
+		     unsigned long buf_min, unsigned long buf_max,
+		     bool top_down, unsigned long *load_addr)
+{
+
+	struct kexec_segment *ksegment;
+	struct kexec_buf buf, *kbuf;
+	int ret;
+
+	/* Currently adding segment this way is allowed only in file mode */
+	if (!image->file_mode)
+		return -EINVAL;
+
+	if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+		return -EINVAL;
+
+	/*
+	 * Make sure we are not trying to add buffer after allocating
+	 * control pages. All segments need to be placed first before
+	 * any control pages are allocated. As control page allocation
+	 * logic goes through list of segments to make sure there are
+	 * no destination overlaps.
+	 */
+	if (!list_empty(&image->control_pages)) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	memset(&buf, 0, sizeof(struct kexec_buf));
+	kbuf = &buf;
+	kbuf->image = image;
+	kbuf->buffer = buffer;
+	kbuf->bufsz = bufsz;
+
+	kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+	kbuf->buf_align = max(buf_align, PAGE_SIZE);
+	kbuf->buf_min = buf_min;
+	kbuf->buf_max = buf_max;
+	kbuf->top_down = top_down;
+
+	/* Walk the RAM ranges and allocate a suitable range for the buffer */
+	ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback);
+	if (ret != 1) {
+		/* A suitable memory range could not be found for buffer */
+		return -EADDRNOTAVAIL;
+	}
+
+	/* Found a suitable memory range */
+	ksegment = &image->segment[image->nr_segments - 1];
+	*load_addr = ksegment->mem;
+	return 0;
+}
+
+
 /*
  * Move into place and start executing a preloaded standalone
  * executable.  If nothing was preloaded return an error.
-- 
cgit v1.2.3