From 4749c3ef854e3a5d3dd3cc0ccd2dcb7e05d583bd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 30 Apr 2015 12:12:00 +0200
Subject: net: sched: remove TC_MUNGED bits

Not used.

pedit sets TC_MUNGED when packet content was altered, but all the core
does is unset MUNGED again and then set OK2MUNGE.

And the latter isn't tested anywhere. So lets remove both
TC_MUNGED and TC_OK2MUNGE.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index bf08e76bf505..6810ca43a80a 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -35,6 +35,8 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
  *
  * */
 
+#ifndef __KERNEL__
+/* backwards compat for userspace only */
 #define TC_MUNGED          _TC_MAKEMASK1(0)
 #define SET_TC_MUNGED(v)   ( TC_MUNGED | (v & ~TC_MUNGED))
 #define CLR_TC_MUNGED(v)   ( v & ~TC_MUNGED)
@@ -42,6 +44,7 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define TC_OK2MUNGE        _TC_MAKEMASK1(1)
 #define SET_TC_OK2MUNGE(v)   ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE))
 #define CLR_TC_OK2MUNGE(v)   ( v & ~TC_OK2MUNGE)
+#endif
 
 #define S_TC_VERD          _TC_MAKE32(2)
 #define M_TC_VERD          _TC_MAKEMASK(4,S_TC_VERD)
-- 
cgit v1.2.3


From c19ae86a510cf4332af64caab04718bc853d3184 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Fri, 1 May 2015 22:19:43 -0700
Subject: tc: remove unused redirect ttl

improves ingress+u32 performance from 22.4 Mpps to 22.9 Mpps

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Florian Westphal <fw@strlen.de>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 2 ++
 net/core/dev.c               | 9 ---------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 6810ca43a80a..596ffa0c7084 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -65,11 +65,13 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define SET_TC_NCLS(v)   ( TC_NCLS | (v & ~TC_NCLS))
 #define CLR_TC_NCLS(v)   ( v & ~TC_NCLS)
 
+#ifndef __KERNEL__
 #define S_TC_RTTL          _TC_MAKE32(9)
 #define M_TC_RTTL          _TC_MAKEMASK(3,S_TC_RTTL)
 #define G_TC_RTTL(x)       _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL)
 #define V_TC_RTTL(x)       _TC_MAKEVALUE(x,S_TC_RTTL)
 #define SET_TC_RTTL(v,n)   ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL))
+#endif
 
 #define S_TC_AT          _TC_MAKE32(12)
 #define M_TC_AT          _TC_MAKEMASK(2,S_TC_AT)
diff --git a/net/core/dev.c b/net/core/dev.c
index 74a5b62f7568..862875ec8f2f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3531,18 +3531,9 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
  */
 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
-	struct net_device *dev = skb->dev;
-	u32 ttl = G_TC_RTTL(skb->tc_verd);
 	int result = TC_ACT_OK;
 	struct Qdisc *q;
 
-	if (unlikely(MAX_RED_LOOP < ttl++)) {
-		net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
-				     skb->skb_iif, dev->ifindex);
-		return TC_ACT_SHOT;
-	}
-
-	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
 	q = rcu_dereference(rxq->qdisc);
-- 
cgit v1.2.3


From 9dc6c806b3c4812619e305685b3c86835bf784ab Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Apr 2015 22:37:21 +0200
Subject: nbd: stop using req->cmd

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c      | 48 +++++++++++++++++++++++-------------------------
 include/uapi/linux/nbd.h |  2 --
 2 files changed, 23 insertions(+), 27 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9cf52ac328fe..83a7ba4a3eec 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -230,29 +230,40 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 	int result, flags;
 	struct nbd_request request;
 	unsigned long size = blk_rq_bytes(req);
+	u32 type;
+
+	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+		type = NBD_CMD_DISC;
+	else if (req->cmd_flags & REQ_DISCARD)
+		type = NBD_CMD_TRIM;
+	else if (req->cmd_flags & REQ_FLUSH)
+		type = NBD_CMD_FLUSH;
+	else if (rq_data_dir(req) == WRITE)
+		type = NBD_CMD_WRITE;
+	else
+		type = NBD_CMD_READ;
 
 	memset(&request, 0, sizeof(request));
 	request.magic = htonl(NBD_REQUEST_MAGIC);
-	request.type = htonl(nbd_cmd(req));
-
-	if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) {
+	request.type = htonl(type);
+	if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 		request.len = htonl(size);
 	}
 	memcpy(request.handle, &req, sizeof(req));
 
 	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
-		req, nbdcmd_to_ascii(nbd_cmd(req)),
+		req, nbdcmd_to_ascii(type),
 		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
 	result = sock_xmit(nbd, 1, &request, sizeof(request),
-			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
+			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
 		dev_err(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
 		return -EIO;
 	}
 
-	if (nbd_cmd(req) == NBD_CMD_WRITE) {
+	if (type == NBD_CMD_WRITE) {
 		struct req_iterator iter;
 		struct bio_vec bvec;
 		/*
@@ -352,7 +363,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
 	}
 
 	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
-	if (nbd_cmd(req) == NBD_CMD_READ) {
+	if (rq_data_dir(req) != WRITE) {
 		struct req_iterator iter;
 		struct bio_vec bvec;
 
@@ -452,23 +463,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
 	if (req->cmd_type != REQ_TYPE_FS)
 		goto error_out;
 
-	nbd_cmd(req) = NBD_CMD_READ;
-	if (rq_data_dir(req) == WRITE) {
-		if ((req->cmd_flags & REQ_DISCARD)) {
-			WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM));
-			nbd_cmd(req) = NBD_CMD_TRIM;
-		} else
-			nbd_cmd(req) = NBD_CMD_WRITE;
-		if (nbd->flags & NBD_FLAG_READ_ONLY) {
-			dev_err(disk_to_dev(nbd->disk),
-				"Write on read-only\n");
-			goto error_out;
-		}
-	}
-
-	if (req->cmd_flags & REQ_FLUSH) {
-		BUG_ON(unlikely(blk_rq_sectors(req)));
-		nbd_cmd(req) = NBD_CMD_FLUSH;
+	if (rq_data_dir(req) == WRITE &&
+	    (nbd->flags & NBD_FLAG_READ_ONLY)) {
+		dev_err(disk_to_dev(nbd->disk),
+			"Write on read-only\n");
+		goto error_out;
 	}
 
 	req->errors = 0;
@@ -593,7 +592,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		mutex_lock(&nbd->tx_lock);
 		blk_rq_init(NULL, &sreq);
 		sreq.cmd_type = REQ_TYPE_DRV_PRIV;
-		nbd_cmd(&sreq) = NBD_CMD_DISC;
 
 		/* Check again after getting mutex back.  */
 		if (!nbd->sock)
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 4f52549b23ff..e08e413d5f71 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -44,8 +44,6 @@ enum {
 /* there is a gap here to match userspace */
 #define NBD_FLAG_SEND_TRIM    (1 << 5) /* send trim/discard */
 
-#define nbd_cmd(req) ((req)->cmd[0])
-
 /* userspace doesn't need the nbd_device structure */
 
 /* These are sent over the network in the request/reply magic fields */
-- 
cgit v1.2.3


From cd8ae85299d54155702a56811b2e035e63064d3d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 3 May 2015 21:34:46 -0700
Subject: tcp: provide SYN headers for passive connections

This patch allows a server application to get the TCP SYN headers for
its passive connections.  This is useful if the server is doing
fingerprinting of clients based on SYN packet contents.

Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN.

The first is used on a socket to enable saving the SYN headers
for child connections. This can be set before or after the listen()
call.

The latter is used to retrieve the SYN headers for passive connections,
if the parent listener has enabled TCP_SAVE_SYN.

TCP_SAVED_SYN is read once, it frees the saved SYN headers.

The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP
headers.

Original patch was written by Tom Herbert, I changed it to not hold
a full skb (and associated dst and conntracking reference).

We have used such patch for about 3 years at Google.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h        |  8 ++++++++
 include/net/request_sock.h |  4 +++-
 include/uapi/linux/tcp.h   |  2 ++
 net/ipv4/tcp.c             | 35 +++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_input.c       | 18 ++++++++++++++++++
 net/ipv4/tcp_ipv4.c        |  1 +
 net/ipv4/tcp_minisocks.c   |  3 +++
 7 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3b2911502a8c..e6fb5df22db1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -199,6 +199,7 @@ struct tcp_sock {
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		save_syn:1,	/* Save headers of SYN packet */
 		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
@@ -326,6 +327,7 @@ struct tcp_sock {
 	 * socket. Used to retransmit SYNACKs etc.
 	 */
 	struct request_sock *fastopen_rsk;
+	u32	*saved_syn;
 };
 
 enum tsq_flags {
@@ -393,4 +395,10 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog)
 	return 0;
 }
 
+static inline void tcp_saved_syn_free(struct tcp_sock *tp)
+{
+	kfree(tp->saved_syn);
+	tp->saved_syn = NULL;
+}
+
 #endif	/* _LINUX_TCP_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 9f4265ce8892..87935cad2f7b 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -64,6 +64,7 @@ struct request_sock {
 	struct timer_list		rsk_timer;
 	const struct request_sock_ops	*rsk_ops;
 	struct sock			*sk;
+	u32				*saved_syn;
 	u32				secid;
 	u32				peer_secid;
 };
@@ -77,7 +78,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
 		req->rsk_ops = ops;
 		sock_hold(sk_listener);
 		req->rsk_listener = sk_listener;
-
+		req->saved_syn = NULL;
 		/* Following is temporary. It is coupled with debugging
 		 * helpers in reqsk_put() & reqsk_free()
 		 */
@@ -104,6 +105,7 @@ static inline void reqsk_free(struct request_sock *req)
 	req->rsk_ops->destructor(req);
 	if (req->rsk_listener)
 		sock_put(req->rsk_listener);
+	kfree(req->saved_syn);
 	kmem_cache_free(req->rsk_ops->slab, req);
 }
 
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index faa72f4fa547..51ebedba577f 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -113,6 +113,8 @@ enum {
 #define TCP_TIMESTAMP		24
 #define TCP_NOTSENT_LOWAT	25	/* limit number of unsent bytes in write queue */
 #define TCP_CC_INFO		26	/* Get Congestion Control (optional) info */
+#define TCP_SAVE_SYN		27	/* Record SYN headers for new connections */
+#define TCP_SAVED_SYN		28	/* Get SYN headers recorded for connection */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46efa03d2b11..ecccfdc50d76 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2482,6 +2482,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			icsk->icsk_syn_retries = val;
 		break;
 
+	case TCP_SAVE_SYN:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->save_syn = val;
+		break;
+
 	case TCP_LINGER2:
 		if (val < 0)
 			tp->linger2 = -1;
@@ -2818,6 +2825,34 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 	case TCP_NOTSENT_LOWAT:
 		val = tp->notsent_lowat;
 		break;
+	case TCP_SAVE_SYN:
+		val = tp->save_syn;
+		break;
+	case TCP_SAVED_SYN: {
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		if (tp->saved_syn) {
+			len = min_t(unsigned int, tp->saved_syn[0], len);
+			if (put_user(len, optlen)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			tcp_saved_syn_free(tp);
+			release_sock(sk);
+		} else {
+			release_sock(sk);
+			len = 0;
+			if (put_user(len, optlen))
+				return -EFAULT;
+		}
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 09bdc4abfcbb..df2ca615cd0c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6060,6 +6060,23 @@ static bool tcp_syn_flood_action(struct sock *sk,
 	return want_cookie;
 }
 
+static void tcp_reqsk_record_syn(const struct sock *sk,
+				 struct request_sock *req,
+				 const struct sk_buff *skb)
+{
+	if (tcp_sk(sk)->save_syn) {
+		u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
+		u32 *copy;
+
+		copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
+		if (copy) {
+			copy[0] = len;
+			memcpy(&copy[1], skb_network_header(skb), len);
+			req->saved_syn = copy;
+		}
+	}
+}
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
@@ -6192,6 +6209,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		tcp_rsk(req)->tfo_listener = false;
 		af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	}
+	tcp_reqsk_record_syn(sk, req, skb);
 
 	return 0;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fc1c658ec6c1..91cb4768a860 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1802,6 +1802,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
 	/* If socket is aborted during connect operation */
 	tcp_free_fastopen_req(tp);
+	tcp_saved_syn_free(tp);
 
 	sk_sockets_allocated_dec(sk);
 	sock_release_memcg(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e5d7649136fc..ebe2ab2596ed 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -536,6 +536,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
 
+		newtp->saved_syn = req->saved_syn;
+		req->saved_syn = NULL;
+
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
 	return newsk;
-- 
cgit v1.2.3


From a2f11835994ed5bcd6d66c7205947cc482231b08 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@churchofgit.com>
Date: Tue, 5 May 2015 09:07:16 -0700
Subject: can.h: make padding given by gcc explicit

The current definition of struct can_frame has a 16-byte size, with 8-byte
alignment, but the 3 bytes of padding are not explicit like the similar 2 bytes
of padding of struct canfd_frame. Make it explicit so it is easier to read.

Signed-off-by: Shawn Landden <shawn@churchofgit.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/networking/can.txt | 3 +++
 include/uapi/linux/can.h         | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 5abad1e921ca..b48d4a149411 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -268,6 +268,9 @@ solution for a couple of reasons:
     struct can_frame {
             canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
             __u8    can_dlc; /* frame payload length in byte (0 .. 8) */
+            __u8    __pad;   /* padding */
+            __u8    __res0;  /* reserved / padding */
+            __u8    __res1;  /* reserved / padding */
             __u8    data[8] __attribute__((aligned(8)));
     };
 
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 41892f720057..9692cda5f8fc 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -95,11 +95,17 @@ typedef __u32 can_err_mask_t;
  * @can_dlc: frame payload length in byte (0 .. 8) aka data length code
  *           N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1
  *           mapping of the 'data length code' to the real payload length
+ * @__pad:   padding
+ * @__res0:  reserved / padding
+ * @__res1:  reserved / padding
  * @data:    CAN frame payload (up to 8 byte)
  */
 struct can_frame {
 	canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
 	__u8    can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */
+	__u8    __pad;   /* padding */
+	__u8    __res0;  /* reserved / padding */
+	__u8    __res1;  /* reserved / padding */
 	__u8    data[CAN_MAX_DLEN] __attribute__((aligned(8)));
 };
 
-- 
cgit v1.2.3


From 06f207fc541862ba8902ceda0ddeade6ea6bce72 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 6 May 2015 16:28:31 +0300
Subject: cfg80211: change GO_CONCURRENT to IR_CONCURRENT for STA

The GO_CONCURRENT regulatory definition can be extended to station
interfaces requesting to IR as part of TDLS off-channel operations.
Rename the GO_CONCURRENT flag to IR_CONCURRENT and allow the added
use-case.

Change internal users of GO_CONCURRENT to use the new definition.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/iwlwifi/iwl-nvm-parse.c |  2 +-
 include/net/cfg80211.h                       |  4 +--
 include/uapi/linux/nl80211.h                 | 28 +++++++++++---------
 net/wireless/chan.c                          | 38 ++++++++++++++++------------
 net/wireless/nl80211.c                       |  4 +--
 net/wireless/reg.c                           |  4 +--
 6 files changed, 45 insertions(+), 35 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
index 83903a5025c2..0b5a81d52a3e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
@@ -248,7 +248,7 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz,
 	 */
 	if ((nvm_flags & NVM_CHANNEL_GO_CONCURRENT) &&
 	    (flags & IEEE80211_CHAN_NO_IR))
-		flags |= IEEE80211_CHAN_GO_CONCURRENT;
+		flags |= IEEE80211_CHAN_IR_CONCURRENT;
 
 	return flags;
 }
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f8d6813cd5b2..d63ecec73090 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -111,7 +111,7 @@ enum ieee80211_band {
  *	This may be due to the driver or due to regulatory bandwidth
  *	restrictions.
  * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY
- * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT
+ * @IEEE80211_CHAN_IR_CONCURRENT: see %NL80211_FREQUENCY_ATTR_IR_CONCURRENT
  * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted
  *	on this channel.
  * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
@@ -129,7 +129,7 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_NO_80MHZ		= 1<<7,
 	IEEE80211_CHAN_NO_160MHZ	= 1<<8,
 	IEEE80211_CHAN_INDOOR_ONLY	= 1<<9,
-	IEEE80211_CHAN_GO_CONCURRENT	= 1<<10,
+	IEEE80211_CHAN_IR_CONCURRENT	= 1<<10,
 	IEEE80211_CHAN_NO_20MHZ		= 1<<11,
 	IEEE80211_CHAN_NO_10MHZ		= 1<<12,
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 241220c43e86..c0ab6b0a3919 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2620,16 +2620,17 @@ enum nl80211_band_attr {
  *	an indoor surroundings, i.e., it is connected to AC power (and not
  *	through portable DC inverters) or is under the control of a master
  *	that is acting as an AP and is connected to AC power.
- * @NL80211_FREQUENCY_ATTR_GO_CONCURRENT: GO operation is allowed on this
+ * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this
  *	channel if it's connected concurrently to a BSS on the same channel on
  *	the 2 GHz band or to a channel in the same UNII band (on the 5 GHz
- *	band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO on a
- *	channel that has the GO_CONCURRENT attribute set can be done when there
- *	is a clear assessment that the device is operating under the guidance of
- *	an authorized master, i.e., setting up a GO while the device is also
- *	connected to an AP with DFS and radar detection on the UNII band (it is
- *	up to user-space, i.e., wpa_supplicant to perform the required
- *	verifications)
+ *	band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS
+ *	off-channel on a channel that has the IR_CONCURRENT attribute set can be
+ *	done when there is a clear assessment that the device is operating under
+ *	the guidance of an authorized master, i.e., setting up a GO or TDLS
+ *	off-channel while the device is also connected to an AP with DFS and
+ *	radar detection on the UNII band (it is up to user-space, i.e.,
+ *	wpa_supplicant to perform the required verifications). Using this
+ *	attribute for IR is disallowed for master interfaces (IBSS, AP).
  * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed
  *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed
@@ -2641,7 +2642,7 @@ enum nl80211_band_attr {
  * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122
  * for more information on the FCC description of the relaxations allowed
  * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and
- * NL80211_FREQUENCY_ATTR_GO_CONCURRENT.
+ * NL80211_FREQUENCY_ATTR_IR_CONCURRENT.
  */
 enum nl80211_frequency_attr {
 	__NL80211_FREQUENCY_ATTR_INVALID,
@@ -2659,7 +2660,7 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_NO_160MHZ,
 	NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
 	NL80211_FREQUENCY_ATTR_INDOOR_ONLY,
-	NL80211_FREQUENCY_ATTR_GO_CONCURRENT,
+	NL80211_FREQUENCY_ATTR_IR_CONCURRENT,
 	NL80211_FREQUENCY_ATTR_NO_20MHZ,
 	NL80211_FREQUENCY_ATTR_NO_10MHZ,
 
@@ -2672,6 +2673,8 @@ enum nl80211_frequency_attr {
 #define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN	NL80211_FREQUENCY_ATTR_NO_IR
 #define NL80211_FREQUENCY_ATTR_NO_IBSS		NL80211_FREQUENCY_ATTR_NO_IR
 #define NL80211_FREQUENCY_ATTR_NO_IR		NL80211_FREQUENCY_ATTR_NO_IR
+#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \
+					NL80211_FREQUENCY_ATTR_IR_CONCURRENT
 
 /**
  * enum nl80211_bitrate_attr - bitrate attributes
@@ -2830,7 +2833,7 @@ enum nl80211_sched_scan_match_attr {
  * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated
  *	base on contiguous rules and wider channels will be allowed to cross
  *	multiple contiguous/overlapping frequency ranges.
- * @NL80211_RRF_GO_CONCURRENT: See &NL80211_FREQUENCY_ATTR_GO_CONCURRENT
+ * @NL80211_RRF_IR_CONCURRENT: See &NL80211_FREQUENCY_ATTR_IR_CONCURRENT
  * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation
  * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation
  * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed
@@ -2847,7 +2850,7 @@ enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_IR		= 1<<7,
 	__NL80211_RRF_NO_IBSS		= 1<<8,
 	NL80211_RRF_AUTO_BW		= 1<<11,
-	NL80211_RRF_GO_CONCURRENT	= 1<<12,
+	NL80211_RRF_IR_CONCURRENT	= 1<<12,
 	NL80211_RRF_NO_HT40MINUS	= 1<<13,
 	NL80211_RRF_NO_HT40PLUS		= 1<<14,
 	NL80211_RRF_NO_80MHZ		= 1<<15,
@@ -2859,6 +2862,7 @@ enum nl80211_reg_rule_flags {
 #define NL80211_RRF_NO_IR		NL80211_RRF_NO_IR
 #define NL80211_RRF_NO_HT40		(NL80211_RRF_NO_HT40MINUS |\
 					 NL80211_RRF_NO_HT40PLUS)
+#define NL80211_RRF_GO_CONCURRENT	NL80211_RRF_IR_CONCURRENT
 
 /* For backport compatibility with older userspace */
 #define NL80211_RRF_NO_IR_ALL		(NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS)
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 5bcffdbf3e88..915b328b9ac5 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -698,19 +698,20 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 EXPORT_SYMBOL(cfg80211_chandef_usable);
 
 /*
- * For GO only, check if the channel can be used under permissive conditions
- * mandated by the some regulatory bodies, i.e., the channel is marked with
- * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface
+ * Check if the channel can be used under permissive conditions mandated by
+ * some regulatory bodies, i.e., the channel is marked with
+ * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface
  * associated to an AP on the same channel or on the same UNII band
  * (assuming that the AP is an authorized master).
- * In addition allow the GO to operate on a channel on which indoor operation is
+ * In addition allow operation on a channel on which indoor operation is
  * allowed, iff we are currently operating in an indoor environment.
  */
-static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
+static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
+					enum nl80211_iftype iftype,
 					struct ieee80211_channel *chan)
 {
 	struct wireless_dev *wdev;
-	struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	ASSERT_RTNL();
 
@@ -718,16 +719,22 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
 	    !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR))
 		return false;
 
+	/* only valid for GO and TDLS off-channel (station/p2p-CL) */
+	if (iftype != NL80211_IFTYPE_P2P_GO &&
+	    iftype != NL80211_IFTYPE_STATION &&
+	    iftype != NL80211_IFTYPE_P2P_CLIENT)
+		return false;
+
 	if (regulatory_indoor_allowed() &&
 	    (chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
 		return true;
 
-	if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT))
+	if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT))
 		return false;
 
 	/*
 	 * Generally, it is possible to rely on another device/driver to allow
-	 * the GO concurrent relaxation, however, since the device can further
+	 * the IR concurrent relaxation, however, since the device can further
 	 * enforce the relaxation (by doing a similar verifications as this),
 	 * and thus fail the GO instantiation, consider only the interfaces of
 	 * the current registered device.
@@ -748,7 +755,8 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
 		 * GO_CONCURRENT is disconnected now. But then we must make sure
 		 * we're not outdoor on an indoor-only channel.
 		 */
-		if (wdev->iftype == NL80211_IFTYPE_P2P_GO &&
+		if (iftype == NL80211_IFTYPE_P2P_GO &&
+		    wdev->iftype == NL80211_IFTYPE_P2P_GO &&
 		    wdev->beacon_interval &&
 		    !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
 			other_chan = wdev->chandef.chan;
@@ -793,7 +801,6 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
 			     struct cfg80211_chan_def *chandef,
 			     enum nl80211_iftype iftype)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	bool res;
 	u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
 			       IEEE80211_CHAN_RADAR;
@@ -801,13 +808,12 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
 	trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype);
 
 	/*
-	 * Under certain conditions suggested by the some regulatory bodies
-	 * a GO can operate on channels marked with IEEE80211_NO_IR
-	 * so set this flag only if such relaxations are not enabled and
-	 * the conditions are not met.
+	 * Under certain conditions suggested by some regulatory bodies a
+	 * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag
+	 * only if such relaxations are not enabled and the conditions are not
+	 * met.
 	 */
-	if (iftype != NL80211_IFTYPE_P2P_GO ||
-	    !cfg80211_go_permissive_chan(rdev, chandef->chan))
+	if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan))
 		prohibited_flags |= IEEE80211_CHAN_NO_IR;
 
 	if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8a33bbae9ec5..c264effd00a6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -639,8 +639,8 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 		if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) &&
 		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY))
 			goto nla_put_failure;
-		if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) &&
-		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT))
+		if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT))
 			goto nla_put_failure;
 		if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) &&
 		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ))
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0e347f888fe9..d359e0610198 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -989,8 +989,8 @@ static u32 map_regdom_flags(u32 rd_flags)
 		channel_flags |= IEEE80211_CHAN_NO_OFDM;
 	if (rd_flags & NL80211_RRF_NO_OUTDOOR)
 		channel_flags |= IEEE80211_CHAN_INDOOR_ONLY;
-	if (rd_flags & NL80211_RRF_GO_CONCURRENT)
-		channel_flags |= IEEE80211_CHAN_GO_CONCURRENT;
+	if (rd_flags & NL80211_RRF_IR_CONCURRENT)
+		channel_flags |= IEEE80211_CHAN_IR_CONCURRENT;
 	if (rd_flags & NL80211_RRF_NO_HT40MINUS)
 		channel_flags |= IEEE80211_CHAN_NO_HT40MINUS;
 	if (rd_flags & NL80211_RRF_NO_HT40PLUS)
-- 
cgit v1.2.3


From 90de4a1875180f8347c075319af2cce586c96ab6 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Mon, 13 Apr 2015 01:53:41 +0300
Subject: KVM: x86: Support for disabling quirks

Introducing KVM_CAP_DISABLE_QUIRKS for disabling x86 quirks that were previous
created in order to overcome QEMU issues. Those issue were mostly result of
invalid VM BIOS.  Currently there are two quirks that can be disabled:

1. KVM_QUIRK_LINT0_REENABLED - LINT0 was enabled after boot
2. KVM_QUIRK_CD_NW_CLEARED - CD and NW are cleared after boot

These two issues are already resolved in recent releases of QEMU, and would
therefore be disabled by QEMU.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Message-Id: <1428879221-29996-1-git-send-email-namit@cs.technion.ac.il>
[Report capability from KVM_CHECK_EXTENSION too. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  3 ++-
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/include/uapi/asm/kvm.h   |  3 +++
 arch/x86/kvm/lapic.c              |  5 +++--
 arch/x86/kvm/svm.c                |  3 ++-
 arch/x86/kvm/x86.c                | 30 ++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |  1 +
 7 files changed, 43 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 9fa2bf8c3f6f..695544420ff2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -959,7 +959,8 @@ documentation when it pops into existence).
 4.37 KVM_ENABLE_CAP
 
 Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
-Architectures: ppc, s390
+Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
+	       mips (only KVM_CAP_ENABLE_CAP), ppc, s390
 Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
 Parameters: struct kvm_enable_cap (in)
 Returns: 0 on success; -1 on error
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e962e3..f80ad591aa61 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -635,6 +635,8 @@ struct kvm_arch {
 	#endif
 
 	bool boot_vcpu_runs_old_kvmclock;
+
+	u64 disabled_quirks;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d7dcef58aefa..2fec75e4b1e1 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -345,4 +345,7 @@ struct kvm_xcrs {
 struct kvm_sync_regs {
 };
 
+#define KVM_QUIRK_LINT0_REENABLED	(1 << 0)
+#define KVM_QUIRK_CD_NW_CLEARED		(1 << 1)
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 629af0f1c5c4..4071eb161c8f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1577,8 +1577,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 	apic->lapic_timer.timer_mode = 0;
-	apic_set_reg(apic, APIC_LVT0,
-		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
+	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
+		apic_set_reg(apic, APIC_LVT0,
+			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
 	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
 	apic_set_spiv(apic, 0xff);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce741b8650f6..46299dac7c6d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1575,7 +1575,8 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	 * does not do it - this results in some delay at
 	 * reboot
 	 */
-	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
+		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
 	svm->vmcb->save.cr0 = cr0;
 	mark_dirty(svm->vmcb, VMCB_CR);
 	update_cr0_intercept(svm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7a959be0aebc..0435b653f583 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2800,6 +2800,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_TIME:
 	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
 	case KVM_CAP_TSC_DEADLINE_TIMER:
+	case KVM_CAP_ENABLE_CAP_VM:
+	case KVM_CAP_DISABLE_QUIRKS:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -3847,6 +3849,26 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 	return 0;
 }
 
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+				   struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_DISABLE_QUIRKS:
+		kvm->arch.disabled_quirks = cap->args[0];
+		r = 0;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -4099,7 +4121,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
 
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
 	default:
 		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4b60056776d1..75bd9f7fd846 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -814,6 +814,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_INJECT_IRQ 113
 #define KVM_CAP_S390_IRQ_STATE 114
 #define KVM_CAP_PPC_HWRNG 115
+#define KVM_CAP_DISABLE_QUIRKS 116
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From e520af48c7e5acae5f17f82a79ba7ab7cf156f3b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 6 May 2015 14:26:25 -0700
Subject: tcp: add TCPWinProbe and TCPKeepAlive SNMP counters

Diagnosing problems related to Window Probes has been hard because
we lack a counter.

TCPWinProbe counts the number of ACK packets a sender has to send
at regular intervals to make sure a reverse ACK packet opening back
a window had not been lost.

TCPKeepAlive counts the number of ACK packets sent to keep TCP
flows alive (SO_KEEPALIVE)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h         |  2 +-
 include/uapi/linux/snmp.h |  2 ++
 net/ipv4/proc.c           |  2 ++
 net/ipv4/tcp_output.c     | 13 +++++++------
 net/ipv4/tcp_timer.c      |  2 +-
 5 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7a2248a35b13..b8ea12880fd9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -527,7 +527,7 @@ int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
 
 void tcp_send_probe0(struct sock *);
 void tcp_send_partial(struct sock *);
-int tcp_write_wakeup(struct sock *);
+int tcp_write_wakeup(struct sock *, int mib);
 void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 6a6fb747c78d..eee8968407f0 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -276,6 +276,8 @@ enum
 	LINUX_MIB_TCPACKSKIPPEDFINWAIT2,	/* TCPACKSkippedFinWait2 */
 	LINUX_MIB_TCPACKSKIPPEDTIMEWAIT,	/* TCPACKSkippedTimeWait */
 	LINUX_MIB_TCPACKSKIPPEDCHALLENGE,	/* TCPACKSkippedChallenge */
+	LINUX_MIB_TCPWINPROBE,			/* TCPWinProbe */
+	LINUX_MIB_TCPKEEPALIVE,			/* TCPKeepAlive */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e1f3b911dd1e..da5d483e236a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
 	SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
 	SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
+	SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE),
+	SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b76c719e1979..7386d32cd670 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3382,7 +3382,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack);
  * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
  * out-of-date with SND.UNA-1 to probe window.
  */
-static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
+static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -3400,6 +3400,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	 */
 	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
 	skb_mstamp_get(&skb->skb_mstamp);
+	NET_INC_STATS_BH(sock_net(sk), mib);
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
@@ -3407,12 +3408,12 @@ void tcp_send_window_probe(struct sock *sk)
 {
 	if (sk->sk_state == TCP_ESTABLISHED) {
 		tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
-		tcp_xmit_probe_skb(sk, 0);
+		tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
 	}
 }
 
 /* Initiate keepalive or window probe from timer. */
-int tcp_write_wakeup(struct sock *sk)
+int tcp_write_wakeup(struct sock *sk, int mib)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -3449,8 +3450,8 @@ int tcp_write_wakeup(struct sock *sk)
 		return err;
 	} else {
 		if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
-			tcp_xmit_probe_skb(sk, 1);
-		return tcp_xmit_probe_skb(sk, 0);
+			tcp_xmit_probe_skb(sk, 1, mib);
+		return tcp_xmit_probe_skb(sk, 0, mib);
 	}
 }
 
@@ -3464,7 +3465,7 @@ void tcp_send_probe0(struct sock *sk)
 	unsigned long probe_max;
 	int err;
 
-	err = tcp_write_wakeup(sk);
+	err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
 
 	if (tp->packets_out || !tcp_send_head(sk)) {
 		/* Cancel probe timer, if it is not required. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8c65dc147d8b..65bf670e8714 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data)
 			tcp_write_err(sk);
 			goto out;
 		}
-		if (tcp_write_wakeup(sk) <= 0) {
+		if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
 			icsk->icsk_probes_out++;
 			elapsed = keepalive_intvl_when(tp);
 		} else {
-- 
cgit v1.2.3


From 59324cf35aba5336b611074028777838a963d03b Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 7 May 2015 11:02:53 +0200
Subject: netlink: allow to listen "all" netns

More accurately, listen all netns that have a nsid assigned into the netns
where the netlink socket is opened.
For this purpose, a netlink socket option is added:
NETLINK_LISTEN_ALL_NSID. When this option is set on a netlink socket, this
socket will receive netlink notifications from all netns that have a nsid
assigned into the netns where the socket has been opened. The nsid is sent
to userland via an anscillary data.

With this patch, a daemon needs only one socket to listen many netns. This
is useful when the number of netns is high.

Because 0 is a valid value for a nsid, the field nsid_is_set indicates if
the field nsid is valid or not. skb->cb is initialized to 0 on skb
allocation, thus we are sure that we will never send a nsid 0 by error to
the userland.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h      |  2 ++
 include/net/net_namespace.h  |  2 ++
 include/uapi/linux/netlink.h |  1 +
 net/core/net_namespace.c     | 10 ++++++++-
 net/netlink/af_netlink.c     | 52 +++++++++++++++++++++++++++++++++++++++-----
 5 files changed, 61 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 6835c1279df7..9120edb650a0 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -28,6 +28,8 @@ struct netlink_skb_parms {
 	__u32			dst_group;
 	__u32			flags;
 	struct sock		*sk;
+	bool			nsid_is_set;
+	int			nsid;
 };
 
 #define NETLINK_CB(skb)		(*(struct netlink_skb_parms*)&((skb)->cb))
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 6d1e2eae32fb..3f850acc844e 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -272,6 +272,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
 #endif
 
 int peernet2id_alloc(struct net *net, struct net *peer);
+int peernet2id(struct net *net, struct net *peer);
+bool peernet_has_id(struct net *net, struct net *peer);
 struct net *get_net_ns_by_id(struct net *net, int id);
 
 struct pernet_operations {
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 1a85940f8ab7..3e34b7d702f8 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -108,6 +108,7 @@ struct nlmsgerr {
 #define NETLINK_NO_ENOBUFS	5
 #define NETLINK_RX_RING		6
 #define NETLINK_TX_RING		7
+#define NETLINK_LISTEN_ALL_NSID	8
 
 struct nl_pktinfo {
 	__u32	group;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ae5008b097de..a665bf490c88 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -229,7 +229,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
 EXPORT_SYMBOL(peernet2id_alloc);
 
 /* This function returns, if assigned, the id of a peer netns. */
-static int peernet2id(struct net *net, struct net *peer)
+int peernet2id(struct net *net, struct net *peer)
 {
 	unsigned long flags;
 	int id;
@@ -240,6 +240,14 @@ static int peernet2id(struct net *net, struct net *peer)
 	return id;
 }
 
+/* This function returns true is the peer netns has an id assigned into the
+ * current netns.
+ */
+bool peernet_has_id(struct net *net, struct net *peer)
+{
+	return peernet2id(net, peer) >= 0;
+}
+
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
 	unsigned long flags;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bf7f56d7a9aa..a5fff75accf8 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -83,6 +83,7 @@ struct listeners {
 #define NETLINK_F_RECV_PKTINFO		0x2
 #define NETLINK_F_BROADCAST_SEND_ERROR	0x4
 #define NETLINK_F_RECV_NO_ENOBUFS	0x8
+#define NETLINK_F_LISTEN_ALL_NSID	0x10
 
 static inline int netlink_is_kernel(struct sock *sk)
 {
@@ -1932,8 +1933,17 @@ static void do_one_broadcast(struct sock *sk,
 	    !test_bit(p->group - 1, nlk->groups))
 		return;
 
-	if (!net_eq(sock_net(sk), p->net))
-		return;
+	if (!net_eq(sock_net(sk), p->net)) {
+		if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+			return;
+
+		if (!peernet_has_id(sock_net(sk), p->net))
+			return;
+
+		if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
+				     CAP_NET_BROADCAST))
+			return;
+	}
 
 	if (p->failure) {
 		netlink_overrun(sk);
@@ -1959,13 +1969,22 @@ static void do_one_broadcast(struct sock *sk,
 		p->failure = 1;
 		if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
 			p->delivery_failure = 1;
-	} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+		goto out;
+	}
+	if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
-	} else if (sk_filter(sk, p->skb2)) {
+		goto out;
+	}
+	if (sk_filter(sk, p->skb2)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
-	} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
+		goto out;
+	}
+	NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
+	NETLINK_CB(p->skb2).nsid_is_set = true;
+	val = netlink_broadcast_deliver(sk, p->skb2);
+	if (val < 0) {
 		netlink_overrun(sk);
 		if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
 			p->delivery_failure = 1;
@@ -1974,6 +1993,7 @@ static void do_one_broadcast(struct sock *sk,
 		p->delivered = 1;
 		p->skb2 = NULL;
 	}
+out:
 	sock_put(sk);
 }
 
@@ -2202,6 +2222,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 		break;
 	}
 #endif /* CONFIG_NETLINK_MMAP */
+	case NETLINK_LISTEN_ALL_NSID:
+		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
+			return -EPERM;
+
+		if (val)
+			nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
+		else
+			nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -2268,6 +2298,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
 }
 
+static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
+					 struct sk_buff *skb)
+{
+	if (!NETLINK_CB(skb).nsid_is_set)
+		return;
+
+	put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
+		 &NETLINK_CB(skb).nsid);
+}
+
 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
@@ -2421,6 +2461,8 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	if (nlk->flags & NETLINK_F_RECV_PKTINFO)
 		netlink_cmsg_recv_pktinfo(msg, skb);
+	if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+		netlink_cmsg_listen_all_nsid(sk, msg, skb);
 
 	memset(&scm, 0, sizeof(scm));
 	scm.creds = *NETLINK_CREDS(skb);
-- 
cgit v1.2.3


From 80ba92fa1a92dea128283f69f55b02242e213650 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 8 May 2015 15:05:12 -0700
Subject: codel: add ce_threshold attribute

For DCTCP or similar ECN based deployments on fabrics with shallow
buffers, hosts are responsible for a good part of the buffering.

This patch adds an optional ce_threshold to codel & fq_codel qdiscs,
so that DCTCP can have feedback from queuing in the host.

A DCTCP enabled egress port simply have a queue occupancy threshold
above which ECT packets get CE mark.

In codel language this translates to a sojourn time, so that one doesn't
have to worry about bytes or bandwidth but delays.

This makes the host an active participant in the health of the whole
network.

This also helps experimenting DCTCP in a setup without DCTCP compliant
fabric.

On following example, ce_threshold is set to 1ms, and we can see from
'ldelay xxx us' that TCP is not trying to go around the 5ms codel
target.

Queue has more capacity to absorb inelastic bursts (say from UDP
traffic), as queues are maintained to an optimal level.

lpaa23:~# ./tc -s -d qd sh dev eth1
qdisc mq 1: dev eth1 root
 Sent 87910654696 bytes 58065331 pkt (dropped 0, overlimits 0 requeues 42961)
 backlog 3108242b 364p requeues 42961
qdisc codel 8063: dev eth1 parent 1:1 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms
 Sent 7363778701 bytes 4863809 pkt (dropped 0, overlimits 0 requeues 5503)
 rate 2348Mbit 193919pps backlog 255866b 46p requeues 5503
  count 0 lastcount 0 ldelay 1.0ms drop_next 0us
  maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 72384
qdisc codel 8064: dev eth1 parent 1:2 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms
 Sent 7636486190 bytes 5043942 pkt (dropped 0, overlimits 0 requeues 5186)
 rate 2319Mbit 191538pps backlog 207418b 64p requeues 5186
  count 0 lastcount 0 ldelay 694us drop_next 0us
  maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 69873
qdisc codel 8065: dev eth1 parent 1:3 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms
 Sent 11569360142 bytes 7641602 pkt (dropped 0, overlimits 0 requeues 5554)
 rate 3041Mbit 251096pps backlog 210446b 59p requeues 5554
  count 0 lastcount 0 ldelay 889us drop_next 0us
  maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 37780
...

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Glenn Judd <glenn.judd@morganstanley.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/codel.h            | 12 +++++++++++-
 include/uapi/linux/pkt_sched.h |  4 ++++
 net/sched/sch_codel.c          | 15 +++++++++++++--
 net/sched/sch_fq_codel.c       | 15 ++++++++++++++-
 4 files changed, 42 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/codel.h b/include/net/codel.h
index aeee28081245..8c0f78f209e8 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -7,7 +7,7 @@
  *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
  *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -119,11 +119,13 @@ static inline u32 codel_time_to_us(codel_time_t val)
 /**
  * struct codel_params - contains codel parameters
  * @target:	target queue size (in time units)
+ * @ce_threshold:  threshold for marking packets with ECN CE
  * @interval:	width of moving time window
  * @ecn:	is Explicit Congestion Notification enabled
  */
 struct codel_params {
 	codel_time_t	target;
+	codel_time_t	ce_threshold;
 	codel_time_t	interval;
 	bool		ecn;
 };
@@ -159,17 +161,22 @@ struct codel_vars {
  * @maxpacket:	largest packet we've seen so far
  * @drop_count:	temp count of dropped packets in dequeue()
  * ecn_mark:	number of packets we ECN marked instead of dropping
+ * ce_mark:	number of packets CE marked because sojourn time was above ce_threshold
  */
 struct codel_stats {
 	u32		maxpacket;
 	u32		drop_count;
 	u32		ecn_mark;
+	u32		ce_mark;
 };
 
+#define CODEL_DISABLED_THRESHOLD INT_MAX
+
 static void codel_params_init(struct codel_params *params)
 {
 	params->interval = MS2TIME(100);
 	params->target = MS2TIME(5);
+	params->ce_threshold = CODEL_DISABLED_THRESHOLD;
 	params->ecn = false;
 }
 
@@ -350,6 +357,9 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
 						    vars->rec_inv_sqrt);
 	}
 end:
+	if (skb && codel_time_after(vars->ldelay, params->ce_threshold) &&
+	    INET_ECN_set_ce(skb))
+		stats->ce_mark++;
 	return skb;
 }
 #endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 534b84710745..69d88b309cc7 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -679,6 +679,7 @@ enum {
 	TCA_CODEL_LIMIT,
 	TCA_CODEL_INTERVAL,
 	TCA_CODEL_ECN,
+	TCA_CODEL_CE_THRESHOLD,
 	__TCA_CODEL_MAX
 };
 
@@ -695,6 +696,7 @@ struct tc_codel_xstats {
 	__u32	drop_overlimit; /* number of time max qdisc packet limit was hit */
 	__u32	ecn_mark;  /* number of packets we ECN marked instead of dropped */
 	__u32	dropping;  /* are we in dropping state ? */
+	__u32	ce_mark;   /* number of CE marked packets because of ce_threshold */
 };
 
 /* FQ_CODEL */
@@ -707,6 +709,7 @@ enum {
 	TCA_FQ_CODEL_ECN,
 	TCA_FQ_CODEL_FLOWS,
 	TCA_FQ_CODEL_QUANTUM,
+	TCA_FQ_CODEL_CE_THRESHOLD,
 	__TCA_FQ_CODEL_MAX
 };
 
@@ -730,6 +733,7 @@ struct tc_fq_codel_qd_stats {
 				 */
 	__u32	new_flows_len;	/* count of flows in new list */
 	__u32	old_flows_len;	/* count of flows in old list */
+	__u32	ce_mark;	/* packets above ce_threshold */
 };
 
 struct tc_fq_codel_cl_stats {
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index de28f8e968e8..1474b6560fac 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -6,7 +6,7 @@
  *
  *  Implemented on linux by :
  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
 	[TCA_CODEL_LIMIT]	= { .type = NLA_U32 },
 	[TCA_CODEL_INTERVAL]	= { .type = NLA_U32 },
 	[TCA_CODEL_ECN]		= { .type = NLA_U32 },
+	[TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
 };
 
 static int codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
 		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
 	}
 
+	if (tb[TCA_CODEL_CE_THRESHOLD]) {
+		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
+
+		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+	}
+
 	if (tb[TCA_CODEL_INTERVAL]) {
 		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
 
@@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 	    nla_put_u32(skb, TCA_CODEL_ECN,
 			q->params.ecn))
 		goto nla_put_failure;
-
+	if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	    nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
+			codel_time_to_us(q->params.ce_threshold)))
+		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		.ldelay		= codel_time_to_us(q->vars.ldelay),
 		.dropping	= q->vars.dropping,
 		.ecn_mark	= q->stats.ecn_mark,
+		.ce_mark	= q->stats.ce_mark,
 	};
 
 	if (q->vars.dropping) {
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a6fc53d69513..778739786b32 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -6,7 +6,7 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  *
- *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  */
 
 #include <linux/module.h>
@@ -292,6 +292,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
 	[TCA_FQ_CODEL_ECN]	= { .type = NLA_U32 },
 	[TCA_FQ_CODEL_FLOWS]	= { .type = NLA_U32 },
 	[TCA_FQ_CODEL_QUANTUM]	= { .type = NLA_U32 },
+	[TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
 };
 
 static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -322,6 +323,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
 		q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
 	}
 
+	if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
+		u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
+
+		q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+	}
+
 	if (tb[TCA_FQ_CODEL_INTERVAL]) {
 		u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
 
@@ -441,6 +448,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 			q->flows_cnt))
 		goto nla_put_failure;
 
+	if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	    nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
+			codel_time_to_us(q->cparams.ce_threshold)))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -459,6 +471,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	st.qdisc_stats.drop_overlimit = q->drop_overlimit;
 	st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
 	st.qdisc_stats.new_flow_count = q->new_flow_count;
+	st.qdisc_stats.ce_mark = q->cstats.ce_mark;
 
 	list_for_each(pos, &q->new_flows)
 		st.qdisc_stats.new_flows_len++;
-- 
cgit v1.2.3


From 171a42c38c6e1a5a076d6276e94e55a0b5b7868c Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Sat, 9 May 2015 00:01:58 -0700
Subject: bonding: add netlink support for sys prio, actor sys mac, and port
 key

Adds netlink support for the following bonding options:
* BOND_OPT_AD_ACTOR_SYS_PRIO
* BOND_OPT_AD_ACTOR_SYSTEM
* BOND_OPT_AD_USER_PORT_KEY

When setting the actor system mac address we assume the netlink message
contains a binary mac and not a string representation of a mac.

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
[jt: completed the setting side of the netlink attributes]
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c | 50 ++++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_options.c | 30 ++++++++++++++++-------
 include/uapi/linux/if_link.h       |  3 +++
 3 files changed, 74 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 7b1124366011..f7015eb4f8db 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -94,6 +94,10 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
 	[IFLA_BOND_AD_LACP_RATE]	= { .type = NLA_U8 },
 	[IFLA_BOND_AD_SELECT]		= { .type = NLA_U8 },
 	[IFLA_BOND_AD_INFO]		= { .type = NLA_NESTED },
+	[IFLA_BOND_AD_ACTOR_SYS_PRIO]	= { .type = NLA_U16 },
+	[IFLA_BOND_AD_USER_PORT_KEY]	= { .type = NLA_U16 },
+	[IFLA_BOND_AD_ACTOR_SYSTEM]	= { .type = NLA_BINARY,
+					    .len  = ETH_ALEN },
 };
 
 static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
@@ -379,6 +383,36 @@ static int bond_changelink(struct net_device *bond_dev,
 		if (err)
 			return err;
 	}
+	if (data[IFLA_BOND_AD_ACTOR_SYS_PRIO]) {
+		int actor_sys_prio =
+			nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]);
+
+		bond_opt_initval(&newval, actor_sys_prio);
+		err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval);
+		if (err)
+			return err;
+	}
+
+	if (data[IFLA_BOND_AD_USER_PORT_KEY]) {
+		int port_key =
+			nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]);
+
+		bond_opt_initval(&newval, port_key);
+		err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval);
+		if (err)
+			return err;
+	}
+
+	if (data[IFLA_BOND_AD_ACTOR_SYSTEM]) {
+		if (nla_len(data[IFLA_BOND_AD_ACTOR_SYSTEM]) != ETH_ALEN)
+			return -EINVAL;
+
+		bond_opt_initval(&newval,
+				 nla_get_be64(data[IFLA_BOND_AD_ACTOR_SYSTEM]));
+		err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval);
+		if (err)
+			return err;
+	}
 	return 0;
 }
 
@@ -426,6 +460,9 @@ static size_t bond_get_size(const struct net_device *bond_dev)
 		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */
 		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/
 		nla_total_size(ETH_ALEN) +    /* IFLA_BOND_AD_INFO_PARTNER_MAC*/
+		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_ACTOR_SYS_PRIO */
+		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */
+		nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */
 		0;
 }
 
@@ -551,6 +588,19 @@ static int bond_fill_info(struct sk_buff *skb,
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info info;
 
+		if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO,
+				bond->params.ad_actor_sys_prio))
+			goto nla_put_failure;
+
+		if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY,
+				bond->params.ad_user_port_key))
+			goto nla_put_failure;
+
+		if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM,
+			    sizeof(bond->params.ad_actor_system),
+			    &bond->params.ad_actor_system))
+			goto nla_put_failure;
+
 		if (!bond_3ad_get_active_agg_info(bond, &info)) {
 			struct nlattr *nest;
 
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index c85da05721e6..9a32bbd7724e 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1394,7 +1394,7 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
 static int bond_option_ad_actor_sys_prio_set(struct bonding *bond,
 					     const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting ad_actor_sys_prio to (%llu)\n",
+	netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n",
 		    newval->value);
 
 	bond->params.ad_actor_sys_prio = newval->value;
@@ -1405,24 +1405,36 @@ static int bond_option_ad_actor_system_set(struct bonding *bond,
 					   const struct bond_opt_value *newval)
 {
 	u8 macaddr[ETH_ALEN];
+	u8 *mac;
 	int i;
 
-	i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
-		   &macaddr[0], &macaddr[1], &macaddr[2],
-		   &macaddr[3], &macaddr[4], &macaddr[5]);
-	if (i != ETH_ALEN || !is_valid_ether_addr(macaddr)) {
-		netdev_err(bond->dev, "Invalid MAC address.\n");
-		return -EINVAL;
+	if (newval->string) {
+		i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+			   &macaddr[0], &macaddr[1], &macaddr[2],
+			   &macaddr[3], &macaddr[4], &macaddr[5]);
+		if (i != ETH_ALEN)
+			goto err;
+		mac = macaddr;
+	} else {
+		mac = (u8 *)&newval->value;
 	}
 
-	ether_addr_copy(bond->params.ad_actor_system, macaddr);
+	if (!is_valid_ether_addr(mac))
+		goto err;
+
+	netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac);
+	ether_addr_copy(bond->params.ad_actor_system, mac);
 	return 0;
+
+err:
+	netdev_err(bond->dev, "Invalid MAC address.\n");
+	return -EINVAL;
 }
 
 static int bond_option_ad_user_port_key_set(struct bonding *bond,
 					    const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting ad_user_port_key to (%llu)\n",
+	netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n",
 		    newval->value);
 
 	bond->params.ad_user_port_key = newval->value;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d9cd19214b98..6d6e502e1051 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -417,6 +417,9 @@ enum {
 	IFLA_BOND_AD_LACP_RATE,
 	IFLA_BOND_AD_SELECT,
 	IFLA_BOND_AD_INFO,
+	IFLA_BOND_AD_ACTOR_SYS_PRIO,
+	IFLA_BOND_AD_USER_PORT_KEY,
+	IFLA_BOND_AD_ACTOR_SYSTEM,
 	__IFLA_BOND_MAX,
 };
 
-- 
cgit v1.2.3


From dc199241624a2fd85d9b0d8303babd60feadd0e6 Mon Sep 17 00:00:00 2001
From: Peter Seiderer <ps.report@gmx.net>
Date: Mon, 4 May 2015 07:51:05 -0300
Subject: [media] videodev2: Add V4L2_BUF_FLAG_LAST

This v4l2_buffer flag can be used by drivers to mark a capture buffer
as the last generated buffer, for example after a V4L2_DEC_CMD_STOP
command was issued.

Signed-off-by: Peter Seiderer <ps.report@gmx.net>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Kamil Debski <k.debski@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/trace/events/v4l2.h    | 3 ++-
 include/uapi/linux/videodev2.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h
index 20112170ff11..89d0497c058a 100644
--- a/include/trace/events/v4l2.h
+++ b/include/trace/events/v4l2.h
@@ -83,7 +83,8 @@ SHOW_FIELD
 		{ V4L2_BUF_FLAG_TIMESTAMP_MASK,	     "TIMESTAMP_MASK" },      \
 		{ V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN,   "TIMESTAMP_UNKNOWN" },   \
 		{ V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, "TIMESTAMP_MONOTONIC" }, \
-		{ V4L2_BUF_FLAG_TIMESTAMP_COPY,	     "TIMESTAMP_COPY" })
+		{ V4L2_BUF_FLAG_TIMESTAMP_COPY,	     "TIMESTAMP_COPY" },      \
+		{ V4L2_BUF_FLAG_LAST,                "LAST" })
 
 #define show_timecode_flags(flags)					  \
 	__print_flags(flags, "|",					  \
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index fa376f7666ba..0f5a4673f3e4 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -810,6 +810,8 @@ struct v4l2_buffer {
 #define V4L2_BUF_FLAG_TSTAMP_SRC_MASK		0x00070000
 #define V4L2_BUF_FLAG_TSTAMP_SRC_EOF		0x00000000
 #define V4L2_BUF_FLAG_TSTAMP_SRC_SOE		0x00010000
+/* mem2mem encoder/decoder */
+#define V4L2_BUF_FLAG_LAST			0x00100000
 
 /**
  * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor
-- 
cgit v1.2.3


From 68cbbc3a9d1fc231810b2490bca73b3b444ef542 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Thu, 26 Mar 2015 16:42:09 +1100
Subject: drivers/vfio: Support EEH error injection

The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/vfio.txt        | 12 ++++++++++++
 drivers/vfio/vfio_spapr_eeh.c | 10 ++++++++++
 include/uapi/linux/vfio.h     | 14 +++++++++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978eced341..4c746a7e717a 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -385,6 +385,18 @@ The code flow from the example above should be slightly changed:
 
 	....
 
+	/* Inject EEH error, which is expected to be caused by 32-bits
+	 * config load.
+	 */
+	pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+	pe_op.err.type = EEH_ERR_TYPE_32;
+	pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR;
+	pe_op.err.addr = 0ul;
+	pe_op.err.mask = 0ul;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	....
+
 	/* When 0xFF's returned from reading PCI config space or IO BARs
 	 * of the PCI device. Check the PE's state to see if that has been
 	 * frozen.
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db769ee..38edeb4729a9 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 		case VFIO_EEH_PE_CONFIGURE:
 			ret = eeh_pe_configure(pe);
 			break;
+		case VFIO_EEH_PE_INJECT_ERR:
+			minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
+			if (op.argsz < minsz)
+				return -EINVAL;
+			if (copy_from_user(&op, (void __user *)arg, minsz))
+				return -EFAULT;
+
+			ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
+						op.err.addr, op.err.mask);
+			break;
 		default:
 			ret = -EINVAL;
 		}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index b57b750c222f..e4fa1995f613 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -470,12 +470,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+	__u32 type;
+	__u32 func;
+	__u64 addr;
+	__u64 mask;
+};
+
 struct vfio_eeh_pe_op {
 	__u32 argsz;
 	__u32 flags;
 	__u32 op;
+	union {
+		struct vfio_eeh_pe_err err;
+	};
 };
 
 #define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
@@ -492,6 +503,7 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+#define VFIO_EEH_PE_INJECT_ERR		9	/* Inject EEH error          */
 
 #define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
 
-- 
cgit v1.2.3


From a3eb95f891d6130b1fc03dd07a8b54cf0a5c8ab8 Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Sat, 9 May 2015 22:01:46 -0400
Subject: net_sched: gred: add TCA_GRED_LIMIT attribute

In a GRED qdisc, if the default "virtual queue" (VQ) does not have drop
parameters configured, then packets for the default VQ are not subjected
to RED and are only dropped if the queue is larger than the net_device's
tx_queue_len. This behavior is useful for WRED mode, since these packets
will still influence the calculated average queue length and (therefore)
the drop probability for all of the other VQs. However, for some drivers
tx_queue_len is zero. In other cases the user may wish to make the limit
the same for all VQs (including the default VQ with no drop parameters).

This change adds a TCA_GRED_LIMIT attribute to set the GRED queue limit,
in bytes, during qdisc setup. (This limit is in bytes to be consistent
with the drop parameters.) The default limit is the same as for a bfifo
queue (tx_queue_len * psched_mtu). If the drop parameters of any VQ are
configured with a smaller limit than the GRED queue limit, that VQ will
still observe the smaller limit instead.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  3 ++-
 net/sched/sch_gred.c           | 28 ++++++++++++++++++++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 69d88b309cc7..8d2530daca9f 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -268,7 +268,8 @@ enum {
        TCA_GRED_STAB,
        TCA_GRED_DPS,
        TCA_GRED_MAX_P,
-	   __TCA_GRED_MAX,
+       TCA_GRED_LIMIT,
+       __TCA_GRED_MAX,
 };
 
 #define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index a4ca4517cdc8..826e2994152b 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			 * if no default DP has been configured. This
 			 * allows for DP flows to be left untouched.
 			 */
-			if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
+			if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+					sch->limit))
 				return qdisc_enqueue_tail(skb, sch);
 			else
 				goto drop;
@@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 
 	q->DP = dp;
 	q->prio = prio;
-	q->limit = ctl->limit;
+	if (ctl->limit > sch->limit)
+		q->limit = sch->limit;
+	else
+		q->limit = ctl->limit;
 
 	if (q->backlog == 0)
 		red_end_of_idle_period(&q->vars);
@@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
 	[TCA_GRED_STAB]		= { .len = 256 },
 	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
 	[TCA_GRED_MAX_P]	= { .type = NLA_U32 },
+	[TCA_GRED_LIMIT]	= { .type = NLA_U32 },
 };
 
 static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
+	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
+		if (tb[TCA_GRED_LIMIT] != NULL)
+			sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
 		return gred_change_table_def(sch, opt);
+	}
 
 	if (tb[TCA_GRED_PARMS] == NULL ||
-	    tb[TCA_GRED_STAB] == NULL)
+	    tb[TCA_GRED_STAB] == NULL ||
+	    tb[TCA_GRED_LIMIT] != NULL)
 		return -EINVAL;
 
 	max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
@@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
 		return -EINVAL;
 
+	if (tb[TCA_GRED_LIMIT])
+		sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
+	else {
+		u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
+
+		sch->limit = qlen * psched_mtu(qdisc_dev(sch));
+	}
+
 	return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
 }
 
@@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
+		goto nla_put_failure;
+
 	parms = nla_nest_start(skb, TCA_GRED_PARMS);
 	if (parms == NULL)
 		goto nla_put_failure;
-- 
cgit v1.2.3


From e578d9c02587d57bfa7b560767c698a668a468c6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 May 2015 19:50:41 +0200
Subject: net: sched: use counter to break reclassify loops

Seems all we want here is to avoid endless 'goto reclassify' loop.
tc_classify_compat even resets this counter when something other
than TC_ACT_RECLASSIFY is returned, so this skb-counter doesn't
break hypothetical loops induced by something other than perpetual
TC_ACT_RECLASSIFY return values.

skb_act_clone is now identical to skb_clone, so just use that.

Tested with following (bogus) filter:
tc filter add dev eth0 parent ffff: \
 protocol ip u32 match u32 0 0 police rate 10Kbit burst \
 64000 mtu 1500 action reclassify

Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tc-actions-env-rules.txt |  4 ----
 include/net/sch_generic.h                         | 15 ---------------
 include/uapi/linux/pkt_cls.h                      |  2 +-
 net/sched/act_mirred.c                            |  2 +-
 net/sched/sch_api.c                               | 12 +++---------
 5 files changed, 5 insertions(+), 30 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt
index 95c71716b2e2..f37814693ad3 100644
--- a/Documentation/networking/tc-actions-env-rules.txt
+++ b/Documentation/networking/tc-actions-env-rules.txt
@@ -8,10 +8,6 @@ For example if your action queues a packet to be processed later,
 or intentionally branches by redirecting a packet, then you need to
 clone the packet.
 
-There are certain fields in the skb tc_verd that need to be reset so we
-avoid loops, etc.  A few are generic enough that skb_act_clone()
-resets them for you, so invoke skb_act_clone() rather than skb_clone().
-
 2) If you munge any packet thou shalt call pskb_expand_head in the case
 someone else is referencing the skb. After that you "own" the skb.
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1b0a2e88ed2b..2738f6f87908 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -739,21 +739,6 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
 	return rtab->data[slot];
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
-					    int action)
-{
-	struct sk_buff *n;
-
-	n = skb_clone(skb, gfp_mask);
-
-	if (n) {
-		n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
-	}
-	return n;
-}
-#endif
-
 struct psched_ratecfg {
 	u64	rate_bytes_ps; /* bytes per second */
 	u32	mult;
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 596ffa0c7084..ffc112c8e1c2 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -44,13 +44,13 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define TC_OK2MUNGE        _TC_MAKEMASK1(1)
 #define SET_TC_OK2MUNGE(v)   ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE))
 #define CLR_TC_OK2MUNGE(v)   ( v & ~TC_OK2MUNGE)
-#endif
 
 #define S_TC_VERD          _TC_MAKE32(2)
 #define M_TC_VERD          _TC_MAKEMASK(4,S_TC_VERD)
 #define G_TC_VERD(x)       _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD)
 #define V_TC_VERD(x)       _TC_MAKEVALUE(x,S_TC_VERD)
 #define SET_TC_VERD(v,n)   ((V_TC_VERD(n)) | (v & ~M_TC_VERD))
+#endif
 
 #define S_TC_FROM          _TC_MAKE32(6)
 #define M_TC_FROM          _TC_MAKEMASK(2,S_TC_FROM)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3f63ceac8e01..a42a3b257226 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -151,7 +151,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	at = G_TC_AT(skb->tc_verd);
-	skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
+	skb2 = skb_clone(skb, GFP_ATOMIC);
 	if (skb2 == NULL)
 		goto out;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ad9eed70bc8f..0b74dc0ede9c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1816,13 +1816,8 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
 			continue;
 		err = tp->classify(skb, tp, res);
 
-		if (err >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
-			if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
-				skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
-#endif
+		if (err >= 0)
 			return err;
-		}
 	}
 	return -1;
 }
@@ -1834,23 +1829,22 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	int err = 0;
 #ifdef CONFIG_NET_CLS_ACT
 	const struct tcf_proto *otp = tp;
+	int limit = 0;
 reclassify:
 #endif
 
 	err = tc_classify_compat(skb, tp, res);
 #ifdef CONFIG_NET_CLS_ACT
 	if (err == TC_ACT_RECLASSIFY) {
-		u32 verd = G_TC_VERD(skb->tc_verd);
 		tp = otp;
 
-		if (verd++ >= MAX_REC_LOOP) {
+		if (unlikely(limit++ >= MAX_REC_LOOP)) {
 			net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
 					       tp->q->ops->id,
 					       tp->prio & 0xffff,
 					       ntohs(tp->protocol));
 			return TC_ACT_SHOT;
 		}
-		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
 		goto reclassify;
 	}
 #endif
-- 
cgit v1.2.3


From 77b9900ef53ae047e36a37d13a2aa33bb2d60641 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 12 May 2015 14:56:21 +0200
Subject: tc: introduce Flower classifier

This patch introduces a flow-based filter. So far, the very essential
packet fields are supported.

This patch is only the first step. There is a lot of potential performance
improvements possible to implement. Also a lot of features are missing
now. They will be addressed in follow-up patches.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  30 ++
 net/sched/Kconfig            |  10 +
 net/sched/Makefile           |   1 +
 net/sched/cls_flower.c       | 688 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 729 insertions(+)
 create mode 100644 net/sched/cls_flower.c

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index ffc112c8e1c2..39fb53d67b11 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -409,6 +409,36 @@ enum {
 
 #define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
 
+/* Flower classifier */
+
+enum {
+	TCA_FLOWER_UNSPEC,
+	TCA_FLOWER_CLASSID,
+	TCA_FLOWER_INDEV,
+	TCA_FLOWER_ACT,
+	TCA_FLOWER_KEY_ETH_DST,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_DST_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_TYPE,	/* be16 */
+	TCA_FLOWER_KEY_IP_PROTO,	/* u8 */
+	TCA_FLOWER_KEY_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_SRC_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_SRC_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_TCP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_TCP_DST,		/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
+	__TCA_FLOWER_MAX,
+};
+
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+
 /* Extended Matches */
 
 struct tcf_ematch_tree_hdr {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2274e723a3df..5fd1c2f487d2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -477,6 +477,16 @@ config NET_CLS_BPF
 	  To compile this code as a module, choose M here: the module will
 	  be called cls_bpf.
 
+config NET_CLS_FLOWER
+	tristate "Flower classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  a configurable combination of packet keys and masks.
+
+	  To compile this code as a module, choose M here: the module will
+	  be called cls_flower.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca7f4c1b8c2..690c1689e090 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_CLS_BPF)	+= cls_bpf.o
+obj-$(CONFIG_NET_CLS_FLOWER)	+= cls_flower.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
new file mode 100644
index 000000000000..9bc654c764cd
--- /dev/null
+++ b/net/sched/cls_flower.c
@@ -0,0 +1,688 @@
+/*
+ * net/sched/cls_flower.c		Flower classifier
+ *
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rhashtable.h>
+
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/flow_dissector.h>
+
+struct fl_flow_key {
+	int	indev_ifindex;
+	struct flow_dissector_key_basic basic;
+	struct flow_dissector_key_eth_addrs eth;
+	union {
+		struct flow_dissector_key_addrs ipv4;
+		struct flow_dissector_key_ipv6_addrs ipv6;
+	};
+	struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct fl_flow_mask_range {
+	unsigned short int start;
+	unsigned short int end;
+};
+
+struct fl_flow_mask {
+	struct fl_flow_key key;
+	struct fl_flow_mask_range range;
+	struct rcu_head	rcu;
+};
+
+struct cls_fl_head {
+	struct rhashtable ht;
+	struct fl_flow_mask mask;
+	struct flow_dissector dissector;
+	u32 hgen;
+	bool mask_assigned;
+	struct list_head filters;
+	struct rhashtable_params ht_params;
+	struct rcu_head rcu;
+};
+
+struct cls_fl_filter {
+	struct rhash_head ht_node;
+	struct fl_flow_key mkey;
+	struct tcf_exts exts;
+	struct tcf_result res;
+	struct fl_flow_key key;
+	struct list_head list;
+	u32 handle;
+	struct rcu_head	rcu;
+};
+
+static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
+{
+	return mask->range.end - mask->range.start;
+}
+
+static void fl_mask_update_range(struct fl_flow_mask *mask)
+{
+	const u8 *bytes = (const u8 *) &mask->key;
+	size_t size = sizeof(mask->key);
+	size_t i, first = 0, last = size - 1;
+
+	for (i = 0; i < sizeof(mask->key); i++) {
+		if (bytes[i]) {
+			if (!first && i)
+				first = i;
+			last = i;
+		}
+	}
+	mask->range.start = rounddown(first, sizeof(long));
+	mask->range.end = roundup(last + 1, sizeof(long));
+}
+
+static void *fl_key_get_start(struct fl_flow_key *key,
+			      const struct fl_flow_mask *mask)
+{
+	return (u8 *) key + mask->range.start;
+}
+
+static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
+			      struct fl_flow_mask *mask)
+{
+	const long *lkey = fl_key_get_start(key, mask);
+	const long *lmask = fl_key_get_start(&mask->key, mask);
+	long *lmkey = fl_key_get_start(mkey, mask);
+	int i;
+
+	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
+		*lmkey++ = *lkey++ & *lmask++;
+}
+
+static void fl_clear_masked_range(struct fl_flow_key *key,
+				  struct fl_flow_mask *mask)
+{
+	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
+}
+
+static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+		       struct tcf_result *res)
+{
+	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+	struct cls_fl_filter *f;
+	struct fl_flow_key skb_key;
+	struct fl_flow_key skb_mkey;
+
+	fl_clear_masked_range(&skb_key, &head->mask);
+	skb_key.indev_ifindex = skb->skb_iif;
+	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
+	 * so do it rather here.
+	 */
+	skb_key.basic.n_proto = skb->protocol;
+	skb_flow_dissect(skb, &head->dissector, &skb_key);
+
+	fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+
+	f = rhashtable_lookup_fast(&head->ht,
+				   fl_key_get_start(&skb_mkey, &head->mask),
+				   head->ht_params);
+	if (f) {
+		*res = f->res;
+		return tcf_exts_exec(skb, &f->exts, res);
+	}
+	return -1;
+}
+
+static int fl_init(struct tcf_proto *tp)
+{
+	struct cls_fl_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (!head)
+		return -ENOBUFS;
+
+	INIT_LIST_HEAD_RCU(&head->filters);
+	rcu_assign_pointer(tp->root, head);
+
+	return 0;
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+	tcf_exts_destroy(&f->exts);
+	kfree(f);
+}
+
+static bool fl_destroy(struct tcf_proto *tp, bool force)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f, *next;
+
+	if (!force && !list_empty(&head->filters))
+		return false;
+
+	list_for_each_entry_safe(f, next, &head->filters, list) {
+		list_del_rcu(&f->list);
+		call_rcu(&f->rcu, fl_destroy_filter);
+	}
+	RCU_INIT_POINTER(tp->root, NULL);
+	if (head->mask_assigned)
+		rhashtable_destroy(&head->ht);
+	kfree_rcu(head, rcu);
+	return true;
+}
+
+static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f;
+
+	list_for_each_entry(f, &head->filters, list)
+		if (f->handle == handle)
+			return (unsigned long) f;
+	return 0;
+}
+
+static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
+	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
+	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
+	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
+					    .len = IFNAMSIZ },
+	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
+};
+
+static void fl_set_key_val(struct nlattr **tb,
+			   void *val, int val_type,
+			   void *mask, int mask_type, int len)
+{
+	if (!tb[val_type])
+		return;
+	memcpy(val, nla_data(tb[val_type]), len);
+	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
+		memset(mask, 0xff, len);
+	else
+		memcpy(mask, nla_data(tb[mask_type]), len);
+}
+
+static int fl_set_key(struct net *net, struct nlattr **tb,
+		      struct fl_flow_key *key, struct fl_flow_key *mask)
+{
+	int err;
+
+	if (tb[TCA_FLOWER_INDEV]) {
+		err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+		if (err < 0)
+			return err;
+		key->indev_ifindex = err;
+		mask->indev_ifindex = 0xffffffff;
+	}
+
+	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+		       sizeof(key->eth.dst));
+	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+		       sizeof(key->eth.src));
+	fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+		       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+		       sizeof(key->basic.n_proto));
+	if (key->basic.n_proto == htons(ETH_P_IP) ||
+	    key->basic.n_proto == htons(ETH_P_IPV6)) {
+		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+			       sizeof(key->basic.ip_proto));
+	}
+	if (key->basic.n_proto == htons(ETH_P_IP)) {
+		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+			       sizeof(key->ipv4.src));
+		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+			       sizeof(key->ipv4.dst));
+	} else if (key->basic.n_proto == htons(ETH_P_IPV6)) {
+		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+			       sizeof(key->ipv6.src));
+		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+			       sizeof(key->ipv6.dst));
+	}
+	if (key->basic.ip_proto == IPPROTO_TCP) {
+		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.src));
+		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.dst));
+	} else if (key->basic.ip_proto == IPPROTO_UDP) {
+		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.src));
+		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.dst));
+	}
+
+	return 0;
+}
+
+static bool fl_mask_eq(struct fl_flow_mask *mask1,
+		       struct fl_flow_mask *mask2)
+{
+	const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
+	const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+
+	return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
+	       !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+}
+
+static const struct rhashtable_params fl_ht_params = {
+	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
+	.head_offset = offsetof(struct cls_fl_filter, ht_node),
+	.automatic_shrinking = true,
+};
+
+static int fl_init_hashtable(struct cls_fl_head *head,
+			     struct fl_flow_mask *mask)
+{
+	head->ht_params = fl_ht_params;
+	head->ht_params.key_len = fl_mask_range(mask);
+	head->ht_params.key_offset += mask->range.start;
+
+	return rhashtable_init(&head->ht, &head->ht_params);
+}
+
+#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_END_OFFSET(member)					\
+	(FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
+
+#define FL_KEY_IN_RANGE(mask, member)						\
+        (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end &&			\
+         FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+
+#define FL_KEY_SET(keys, cnt, id, member)					\
+	do {									\
+		keys[cnt].key_id = id;						\
+		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
+		cnt++;								\
+	} while(0);
+
+#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member)			\
+	do {									\
+		if (FL_KEY_IN_RANGE(mask, member))				\
+			FL_KEY_SET(keys, cnt, id, member);			\
+	} while(0);
+
+static void fl_init_dissector(struct cls_fl_head *head,
+			      struct fl_flow_mask *mask)
+{
+	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
+	size_t cnt = 0;
+
+	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_PORTS, tp);
+
+	skb_flow_dissector_init(&head->dissector, keys, cnt);
+}
+
+static int fl_check_assign_mask(struct cls_fl_head *head,
+				struct fl_flow_mask *mask)
+{
+	int err;
+
+	if (head->mask_assigned) {
+		if (!fl_mask_eq(&head->mask, mask))
+			return -EINVAL;
+		else
+			return 0;
+	}
+
+	/* Mask is not assigned yet. So assign it and init hashtable
+	 * according to that.
+	 */
+	err = fl_init_hashtable(head, mask);
+	if (err)
+		return err;
+	memcpy(&head->mask, mask, sizeof(head->mask));
+	head->mask_assigned = true;
+
+	fl_init_dissector(head, mask);
+
+	return 0;
+}
+
+static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+			struct cls_fl_filter *f, struct fl_flow_mask *mask,
+			unsigned long base, struct nlattr **tb,
+			struct nlattr *est, bool ovr)
+{
+	struct tcf_exts e;
+	int err;
+
+	tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_FLOWER_CLASSID]) {
+		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	err = fl_set_key(net, tb, &f->key, &mask->key);
+	if (err)
+		goto errout;
+
+	fl_mask_update_range(mask);
+	fl_set_masked_key(&f->mkey, &f->key, mask);
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(&e);
+	return err;
+}
+
+static u32 fl_grab_new_handle(struct tcf_proto *tp,
+			      struct cls_fl_head *head)
+{
+	unsigned int i = 0x80000000;
+	u32 handle;
+
+	do {
+		if (++head->hgen == 0x7FFFFFFF)
+			head->hgen = 1;
+	} while (--i > 0 && fl_get(tp, head->hgen));
+
+	if (unlikely(i == 0)) {
+		pr_err("Insufficient number of handles\n");
+		handle = 0;
+	} else {
+		handle = head->hgen;
+	}
+
+	return handle;
+}
+
+static int fl_change(struct net *net, struct sk_buff *in_skb,
+		     struct tcf_proto *tp, unsigned long base,
+		     u32 handle, struct nlattr **tca,
+		     unsigned long *arg, bool ovr)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+	struct cls_fl_filter *fnew;
+	struct nlattr *tb[TCA_FLOWER_MAX + 1];
+	struct fl_flow_mask mask = {};
+	int err;
+
+	if (!tca[TCA_OPTIONS])
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+	if (err < 0)
+		return err;
+
+	if (fold && handle && fold->handle != handle)
+		return -EINVAL;
+
+	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+	if (!fnew)
+		return -ENOBUFS;
+
+	tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+
+	if (!handle) {
+		handle = fl_grab_new_handle(tp, head);
+		if (!handle) {
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+	fnew->handle = handle;
+
+	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+	if (err)
+		goto errout;
+
+	err = fl_check_assign_mask(head, &mask);
+	if (err)
+		goto errout;
+
+	err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+				     head->ht_params);
+	if (err)
+		goto errout;
+	if (fold)
+		rhashtable_remove_fast(&head->ht, &fold->ht_node,
+				       head->ht_params);
+
+	*arg = (unsigned long) fnew;
+
+	if (fold) {
+		list_replace_rcu(&fnew->list, &fold->list);
+		tcf_unbind_filter(tp, &fold->res);
+		call_rcu(&fold->rcu, fl_destroy_filter);
+	} else {
+		list_add_tail_rcu(&fnew->list, &head->filters);
+	}
+
+	return 0;
+
+errout:
+	kfree(fnew);
+	return err;
+}
+
+static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+
+	rhashtable_remove_fast(&head->ht, &f->ht_node,
+			       head->ht_params);
+	list_del_rcu(&f->list);
+	tcf_unbind_filter(tp, &f->res);
+	call_rcu(&f->rcu, fl_destroy_filter);
+	return 0;
+}
+
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f;
+
+	list_for_each_entry_rcu(f, &head->filters, list) {
+		if (arg->count < arg->skip)
+			goto skip;
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static int fl_dump_key_val(struct sk_buff *skb,
+			   void *val, int val_type,
+			   void *mask, int mask_type, int len)
+{
+	int err;
+
+	if (!memchr_inv(mask, 0, len))
+		return 0;
+	err = nla_put(skb, val_type, len, val);
+	if (err)
+		return err;
+	if (mask_type != TCA_FLOWER_UNSPEC) {
+		err = nla_put(skb, mask_type, len, mask);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+		   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+	struct nlattr *nest;
+	struct fl_flow_key *key, *mask;
+
+	if (!f)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (f->res.classid &&
+	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+		goto nla_put_failure;
+
+	key = &f->key;
+	mask = &head->mask.key;
+
+	if (mask->indev_ifindex) {
+		struct net_device *dev;
+
+		dev = __dev_get_by_index(net, key->indev_ifindex);
+		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
+			goto nla_put_failure;
+	}
+
+	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+			    sizeof(key->eth.dst)) ||
+	    fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+			    mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+			    sizeof(key->eth.src)) ||
+	    fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+			    sizeof(key->basic.n_proto)))
+		goto nla_put_failure;
+	if ((key->basic.n_proto == htons(ETH_P_IP) ||
+	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
+	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+			    sizeof(key->basic.ip_proto)))
+		goto nla_put_failure;
+
+	if (key->basic.n_proto == htons(ETH_P_IP) &&
+	    (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+			     &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+			     sizeof(key->ipv4.src)) ||
+	     fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+			     &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+			     sizeof(key->ipv4.dst))))
+		goto nla_put_failure;
+	else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+		 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+				  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+				  sizeof(key->ipv6.src)) ||
+		  fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+				  &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+				  sizeof(key->ipv6.dst))))
+		goto nla_put_failure;
+
+	if (key->basic.ip_proto == IPPROTO_TCP &&
+	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+			     &mask->tp.src, TCA_FLOWER_UNSPEC,
+			     sizeof(key->tp.src)) ||
+	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+			     &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			     sizeof(key->tp.dst))))
+		goto nla_put_failure;
+	else if (key->basic.ip_proto == IPPROTO_UDP &&
+		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+				  &mask->tp.src, TCA_FLOWER_UNSPEC,
+				  sizeof(key->tp.src)) ||
+		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+				  &mask->tp.dst, TCA_FLOWER_UNSPEC,
+				  sizeof(key->tp.dst))))
+		goto nla_put_failure;
+
+	if (tcf_exts_dump(skb, &f->exts))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+	.kind		= "flower",
+	.classify	= fl_classify,
+	.init		= fl_init,
+	.destroy	= fl_destroy,
+	.get		= fl_get,
+	.change		= fl_change,
+	.delete		= fl_delete,
+	.walk		= fl_walk,
+	.dump		= fl_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init cls_fl_init(void)
+{
+	return register_tcf_proto_ops(&cls_fl_ops);
+}
+
+static void __exit cls_fl_exit(void)
+{
+	unregister_tcf_proto_ops(&cls_fl_ops);
+}
+
+module_init(cls_fl_init);
+module_exit(cls_fl_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("Flower classifier");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From a9b6391814d5d6b8668fca2dace86949b7244e2e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 12 May 2015 11:56:50 -0400
Subject: packet: rollover statistics

Rollover indicates exceptional conditions. Export a counter to inform
socket owners of this state.

If no socket with sufficient room is found, rollover fails. Also count
these events.

Finally, also count when flows are rolled over early thanks to huge
flow detection, to validate its correctness.

Tested:
  Read counters in bench_rollover on all other tests in the patchset

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_packet.h |  7 +++++++
 net/packet/af_packet.c         | 19 ++++++++++++++++++-
 net/packet/internal.h          |  3 +++
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 053bd102fbe0..d3d715f8c88f 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -54,6 +54,7 @@ struct sockaddr_ll {
 #define PACKET_FANOUT			18
 #define PACKET_TX_HAS_OFF		19
 #define PACKET_QDISC_BYPASS		20
+#define PACKET_ROLLOVER_STATS		21
 
 #define PACKET_FANOUT_HASH		0
 #define PACKET_FANOUT_LB		1
@@ -75,6 +76,12 @@ struct tpacket_stats_v3 {
 	unsigned int	tp_freeze_q_cnt;
 };
 
+struct tpacket_rollover_stats {
+	__aligned_u64	tp_all;
+	__aligned_u64	tp_huge;
+	__aligned_u64	tp_failed;
+};
+
 union tpacket_stats_u {
 	struct tpacket_stats stats1;
 	struct tpacket_stats_v3 stats3;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8f0156b10f8d..31d58565726c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1395,7 +1395,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
 					  unsigned int num)
 {
 	struct packet_sock *po, *po_next;
-	unsigned int i, j, room;
+	unsigned int i, j, room = ROOM_NONE;
 
 	po = pkt_sk(f->arr[idx]);
 
@@ -1413,6 +1413,9 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
 		    packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
 			if (i != j)
 				po->rollover->sock = i;
+			atomic_long_inc(&po->rollover->num);
+			if (room == ROOM_LOW)
+				atomic_long_inc(&po->rollover->num_huge);
 			return i;
 		}
 
@@ -1420,6 +1423,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
 			i = 0;
 	} while (i != j);
 
+	atomic_long_inc(&po->rollover->num_failed);
 	return idx;
 }
 
@@ -1554,6 +1558,9 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
 		if (!po->rollover)
 			return -ENOMEM;
+		atomic_long_set(&po->rollover->num, 0);
+		atomic_long_set(&po->rollover->num_huge, 0);
+		atomic_long_set(&po->rollover->num_failed, 0);
 	}
 
 	mutex_lock(&fanout_mutex);
@@ -3584,6 +3591,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	struct packet_sock *po = pkt_sk(sk);
 	void *data = &val;
 	union tpacket_stats_u st;
+	struct tpacket_rollover_stats rstats;
 
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
@@ -3659,6 +3667,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			((u32)po->fanout->flags << 24)) :
 		       0);
 		break;
+	case PACKET_ROLLOVER_STATS:
+		if (!po->rollover)
+			return -EINVAL;
+		rstats.tp_all = atomic_long_read(&po->rollover->num);
+		rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+		rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+		data = &rstats;
+		lv = sizeof(rstats);
+		break;
 	case PACKET_TX_HAS_OFF:
 		val = po->tp_tx_has_off;
 		break;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index a9d30a17c714..c035d263c1e8 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -89,6 +89,9 @@ struct packet_fanout {
 
 struct packet_rollover {
 	int			sock;
+	atomic_long_t		num;
+	atomic_long_t		num_huge;
+	atomic_long_t		num_failed;
 #define ROLLOVER_HLEN	(L1_CACHE_BYTES / sizeof(u32))
 	u32			history[ROLLOVER_HLEN] ____cacheline_aligned;
 } ____cacheline_aligned_in_smp;
-- 
cgit v1.2.3


From 2d07dc79fe04a43d82a346ced6bbf07bdb523f1b Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 13 May 2015 12:57:30 -0400
Subject: geneve: add initial netdev driver for GENEVE tunnels

This is an initial implementation of a netdev driver for GENEVE
tunnels.  This implementation uses a fixed UDP port, and only supports
point-to-point links with specific partner endpoints.  Only IPv4
links are supported at this time.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig          |  14 ++
 drivers/net/Makefile         |   1 +
 drivers/net/geneve.c         | 503 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/if_link.h |   9 +
 4 files changed, 527 insertions(+)
 create mode 100644 drivers/net/geneve.c

(limited to 'include/uapi/linux')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index df51d6025a90..019fceffc9e5 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -179,6 +179,20 @@ config VXLAN
 	  To compile this driver as a module, choose M here: the module
 	  will be called vxlan.
 
+config GENEVE
+       tristate "Generic Network Virtualization Encapsulation netdev"
+       depends on INET && GENEVE_CORE
+       select NET_IP_TUNNEL
+       ---help---
+	  This allows one to create geneve virtual interfaces that provide
+	  Layer 2 Networks over Layer 3 Networks. GENEVE is often used
+	  to tunnel virtual network infrastructure in virtualized environments.
+	  For more information see:
+	    http://tools.ietf.org/html/draft-gross-geneve-02
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called geneve.
+
 config NETCONSOLE
 	tristate "Network console logging support"
 	---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index e25fdd7d905e..c12cb22478a7 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_TUN) += tun.o
 obj-$(CONFIG_VETH) += veth.o
 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
 obj-$(CONFIG_VXLAN) += vxlan.o
+obj-$(CONFIG_GENEVE) += geneve.o
 obj-$(CONFIG_NLMON) += nlmon.o
 
 #
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
new file mode 100644
index 000000000000..b7eafa4c1a67
--- /dev/null
+++ b/drivers/net/geneve.c
@@ -0,0 +1,503 @@
+/*
+ * GENEVE: Generic Network Virtualization Encapsulation
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/hash.h>
+#include <net/rtnetlink.h>
+#include <net/geneve.h>
+
+#define GENEVE_NETDEV_VER	"0.6"
+
+#define GENEVE_UDP_PORT		6081
+
+#define GENEVE_N_VID		(1u << 24)
+#define GENEVE_VID_MASK		(GENEVE_N_VID - 1)
+
+#define VNI_HASH_BITS		10
+#define VNI_HASH_SIZE		(1<<VNI_HASH_BITS)
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
+/* per-network namespace private data for this module */
+struct geneve_net {
+	struct list_head  geneve_list;
+	struct hlist_head vni_list[VNI_HASH_SIZE];
+};
+
+/* Pseudo network device */
+struct geneve_dev {
+	struct hlist_node  hlist;	/* vni hash table */
+	struct net	   *net;	/* netns for packet i/o */
+	struct net_device  *dev;	/* netdev for geneve tunnel */
+	struct geneve_sock *sock;	/* socket used for geneve tunnel */
+	u8 vni[3];			/* virtual network ID for tunnel */
+	struct sockaddr_in remote;	/* IPv4 address for link partner */
+	struct list_head   next;	/* geneve's per namespace list */
+};
+
+static int geneve_net_id;
+
+static inline __u32 geneve_net_vni_hash(u8 vni[3])
+{
+	__u32 vnid;
+
+	vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
+	return hash_32(vnid, VNI_HASH_BITS);
+}
+
+/* geneve receive/decap routine */
+static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
+{
+	struct genevehdr *gnvh = geneve_hdr(skb);
+	struct geneve_dev *dummy, *geneve = NULL;
+	struct geneve_net *gn;
+	struct iphdr *iph = NULL;
+	struct pcpu_sw_netstats *stats;
+	struct hlist_head *vni_list_head;
+	int err = 0;
+	__u32 hash;
+
+	iph = ip_hdr(skb); /* Still outer IP header... */
+
+	gn = gs->rcv_data;
+
+	/* Find the device for this VNI */
+	hash = geneve_net_vni_hash(gnvh->vni);
+	vni_list_head = &gn->vni_list[hash];
+	hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) {
+		if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) &&
+		    iph->saddr == dummy->remote.sin_addr.s_addr) {
+			geneve = dummy;
+			break;
+		}
+	}
+	if (!geneve)
+		goto drop;
+
+	/* Drop packets w/ critical options,
+	 * since we don't support any...
+	 */
+	if (gnvh->critical)
+		goto drop;
+
+	skb_reset_mac_header(skb);
+	skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev)));
+	skb->protocol = eth_type_trans(skb, geneve->dev);
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+	/* Ignore packet loops (and multicast echo) */
+	if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
+		goto drop;
+
+	skb_reset_network_header(skb);
+
+	iph = ip_hdr(skb); /* Now inner IP header... */
+	err = IP_ECN_decapsulate(iph, skb);
+
+	if (unlikely(err)) {
+		if (log_ecn_error)
+			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+					     &iph->saddr, iph->tos);
+		if (err > 1) {
+			++geneve->dev->stats.rx_frame_errors;
+			++geneve->dev->stats.rx_errors;
+			goto drop;
+		}
+	}
+
+	stats = this_cpu_ptr(geneve->dev->tstats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+	u64_stats_update_end(&stats->syncp);
+
+	netif_rx(skb);
+
+	return;
+drop:
+	/* Consume bad packet */
+	kfree_skb(skb);
+}
+
+/* Setup stats when device is created */
+static int geneve_init(struct net_device *dev)
+{
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void geneve_uninit(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+}
+
+static int geneve_open(struct net_device *dev)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct net *net = geneve->net;
+	struct geneve_net *gn = net_generic(geneve->net, geneve_net_id);
+	struct geneve_sock *gs;
+
+	gs = geneve_sock_add(net, htons(GENEVE_UDP_PORT), geneve_rx, gn,
+	                     false, false);
+	if (IS_ERR(gs))
+		return PTR_ERR(gs);
+
+	geneve->sock = gs;
+
+	return 0;
+}
+
+static int geneve_stop(struct net_device *dev)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct geneve_sock *gs = geneve->sock;
+
+	geneve_sock_release(gs);
+
+	return 0;
+}
+
+static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct geneve_sock *gs = geneve->sock;
+	struct rtable *rt = NULL;
+	const struct iphdr *iip; /* interior IP header */
+	struct flowi4 fl4;
+	int err;
+	__be16 sport;
+	__u8 tos, ttl = 0;
+
+	iip = ip_hdr(skb);
+
+	skb_reset_mac_header(skb);
+
+	/* TODO: port min/max limits should be configurable */
+	sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true);
+
+	memset(&fl4, 0, sizeof(fl4));
+	fl4.daddr = geneve->remote.sin_addr.s_addr;
+	rt = ip_route_output_key(geneve->net, &fl4);
+	if (IS_ERR(rt)) {
+		netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
+		dev->stats.tx_carrier_errors++;
+		goto tx_error;
+	}
+	if (rt->dst.dev == dev) { /* is this necessary? */
+		netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
+		dev->stats.collisions++;
+		goto rt_tx_error;
+	}
+
+	/* TODO: tos and ttl should be configurable */
+
+	tos = ip_tunnel_ecn_encap(0, iip, skb);
+
+	if (IN_MULTICAST(ntohl(fl4.daddr)))
+		ttl = 1;
+
+	ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+
+	/* no need to handle local destination and encap bypass...yet... */
+
+	err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr,
+	                      tos, ttl, 0, sport, htons(GENEVE_UDP_PORT), 0,
+	                      geneve->vni, 0, NULL, false,
+	                      !net_eq(geneve->net, dev_net(geneve->dev)));
+	if (err < 0)
+		ip_rt_put(rt);
+
+	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+
+	return NETDEV_TX_OK;
+
+rt_tx_error:
+	ip_rt_put(rt);
+tx_error:
+	dev->stats.tx_errors++;
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops geneve_netdev_ops = {
+	.ndo_init		= geneve_init,
+	.ndo_uninit		= geneve_uninit,
+	.ndo_open		= geneve_open,
+	.ndo_stop		= geneve_stop,
+	.ndo_start_xmit		= geneve_xmit,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= eth_mac_addr,
+};
+
+static void geneve_get_drvinfo(struct net_device *dev,
+			       struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
+	strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
+}
+
+static const struct ethtool_ops geneve_ethtool_ops = {
+	.get_drvinfo	= geneve_get_drvinfo,
+	.get_link	= ethtool_op_get_link,
+};
+
+/* Info for udev, that this is a virtual tunnel endpoint */
+static struct device_type geneve_type = {
+	.name = "geneve",
+};
+
+/* Initialize the device structure. */
+static void geneve_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->netdev_ops = &geneve_netdev_ops;
+	dev->ethtool_ops = &geneve_ethtool_ops;
+	dev->destructor = free_netdev;
+
+	SET_NETDEV_DEVTYPE(dev, &geneve_type);
+
+	dev->tx_queue_len = 0;
+	dev->features    |= NETIF_F_LLTX;
+	dev->features    |= NETIF_F_SG | NETIF_F_HW_CSUM;
+	dev->features    |= NETIF_F_RXCSUM;
+	dev->features    |= NETIF_F_GSO_SOFTWARE;
+
+	dev->vlan_features = dev->features;
+	dev->features    |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+
+	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+
+	netif_keep_dst(dev);
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+}
+
+static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
+	[IFLA_GENEVE_ID]		= { .type = NLA_U32 },
+	[IFLA_GENEVE_REMOTE]		= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+};
+
+static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+
+	if (!data)
+		return -EINVAL;
+
+	if (data[IFLA_GENEVE_ID]) {
+		__u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
+
+		if (vni >= GENEVE_VID_MASK)
+			return -ERANGE;
+	}
+
+	return 0;
+}
+
+static int geneve_newlink(struct net *net, struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[])
+{
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	struct geneve_dev *dummy, *geneve = netdev_priv(dev);
+	struct hlist_head *vni_list_head;
+	struct sockaddr_in remote;	/* IPv4 address for link partner */
+	__u32 vni, hash;
+	int err;
+
+	if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE])
+		return -EINVAL;
+
+	geneve->net = net;
+	geneve->dev = dev;
+
+	vni = nla_get_u32(data[IFLA_GENEVE_ID]);
+	geneve->vni[0] = (vni & 0x00ff0000) >> 16;
+	geneve->vni[1] = (vni & 0x0000ff00) >> 8;
+	geneve->vni[2] =  vni & 0x000000ff;
+
+	geneve->remote.sin_addr.s_addr =
+		nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
+	if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr)))
+		return -EINVAL;
+
+	remote = geneve->remote;
+	hash = geneve_net_vni_hash(geneve->vni);
+	vni_list_head = &gn->vni_list[hash];
+	hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) {
+		if (!memcmp(geneve->vni, dummy->vni, sizeof(dummy->vni)) &&
+		    !memcmp(&remote, &dummy->remote, sizeof(dummy->remote)))
+			return -EBUSY;
+	}
+
+	if (tb[IFLA_ADDRESS] == NULL)
+		eth_hw_addr_random(dev);
+
+	err = register_netdevice(dev);
+	if (err)
+		return err;
+
+	list_add(&geneve->next, &gn->geneve_list);
+
+	hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]);
+
+	return 0;
+}
+
+static void geneve_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+
+	if (!hlist_unhashed(&geneve->hlist))
+		hlist_del_rcu(&geneve->hlist);
+
+	list_del(&geneve->next);
+	unregister_netdevice_queue(dev, head);
+}
+
+static size_t geneve_get_size(const struct net_device *dev)
+{
+	return nla_total_size(sizeof(__u32)) +	/* IFLA_GENEVE_ID */
+		nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */
+		0;
+}
+
+static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	__u32 vni;
+
+	vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2];
+	if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
+		goto nla_put_failure;
+
+	if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
+			    geneve->remote.sin_addr.s_addr))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static struct rtnl_link_ops geneve_link_ops __read_mostly = {
+	.kind		= "geneve",
+	.maxtype	= IFLA_GENEVE_MAX,
+	.policy		= geneve_policy,
+	.priv_size	= sizeof(struct geneve_dev),
+	.setup		= geneve_setup,
+	.validate	= geneve_validate,
+	.newlink	= geneve_newlink,
+	.dellink	= geneve_dellink,
+	.get_size	= geneve_get_size,
+	.fill_info	= geneve_fill_info,
+};
+
+static __net_init int geneve_init_net(struct net *net)
+{
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	unsigned int h;
+
+	INIT_LIST_HEAD(&gn->geneve_list);
+
+	for (h = 0; h < VNI_HASH_SIZE; ++h)
+		INIT_HLIST_HEAD(&gn->vni_list[h]);
+
+	return 0;
+}
+
+static void __net_exit geneve_exit_net(struct net *net)
+{
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	struct geneve_dev *geneve, *next;
+	struct net_device *dev, *aux;
+	LIST_HEAD(list);
+
+	rtnl_lock();
+
+	/* gather any geneve devices that were moved into this ns */
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &geneve_link_ops)
+			unregister_netdevice_queue(dev, &list);
+
+	/* now gather any other geneve devices that were created in this ns */
+	list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
+		/* If geneve->dev is in the same netns, it was already added
+		 * to the list by the previous loop.
+		 */
+		if (!net_eq(dev_net(geneve->dev), net))
+			unregister_netdevice_queue(geneve->dev, &list);
+	}
+
+	/* unregister the devices gathered above */
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
+}
+
+static struct pernet_operations geneve_net_ops = {
+	.init = geneve_init_net,
+	.exit = geneve_exit_net,
+	.id   = &geneve_net_id,
+	.size = sizeof(struct geneve_net),
+};
+
+static int __init geneve_init_module(void)
+{
+	int rc;
+
+	rc = register_pernet_subsys(&geneve_net_ops);
+	if (rc)
+		goto out1;
+
+	rc = rtnl_link_register(&geneve_link_ops);
+	if (rc)
+		goto out2;
+
+	return 0;
+out2:
+	unregister_pernet_subsys(&geneve_net_ops);
+out1:
+	return rc;
+}
+late_initcall(geneve_init_module);
+
+static void __exit geneve_cleanup_module(void)
+{
+	rtnl_link_unregister(&geneve_link_ops);
+	unregister_pernet_subsys(&geneve_net_ops);
+}
+module_exit(geneve_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(GENEVE_NETDEV_VER);
+MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
+MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
+MODULE_ALIAS_RTNL_LINK("geneve");
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6d6e502e1051..afccc9393fef 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -390,6 +390,15 @@ struct ifla_vxlan_port_range {
 	__be16	high;
 };
 
+/* GENEVE section */
+enum {
+	IFLA_GENEVE_UNSPEC,
+	IFLA_GENEVE_ID,
+	IFLA_GENEVE_REMOTE,
+	__IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
+
 /* Bonding section */
 
 enum {
-- 
cgit v1.2.3


From e687ad60af09010936bbd0b2a3b5d90a8ee8353c Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 13 May 2015 18:19:38 +0200
Subject: netfilter: add netfilter ingress hook after handle_ing() under unique
 static key

This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.

Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().

* Without this patch:

Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
  16086246pps 7721Mb/sec (7721398080bps) errors: 100000000

    42.46%  kpktgend_0   [kernel.kallsyms]   [k] __netif_receive_skb_core
    25.92%  kpktgend_0   [kernel.kallsyms]   [k] kfree_skb
     7.81%  kpktgend_0   [pktgen]            [k] pktgen_thread_worker
     5.62%  kpktgend_0   [kernel.kallsyms]   [k] ip_rcv
     2.70%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_internal
     2.34%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_sk
     1.44%  kpktgend_0   [kernel.kallsyms]   [k] __build_skb

* With this patch:

Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
  16090536pps 7723Mb/sec (7723457280bps) errors: 100000000

    41.23%  kpktgend_0      [kernel.kallsyms]  [k] __netif_receive_skb_core
    26.57%  kpktgend_0      [kernel.kallsyms]  [k] kfree_skb
     7.72%  kpktgend_0      [pktgen]           [k] pktgen_thread_worker
     5.55%  kpktgend_0      [kernel.kallsyms]  [k] ip_rcv
     2.78%  kpktgend_0      [kernel.kallsyms]  [k] netif_receive_skb_internal
     2.06%  kpktgend_0      [kernel.kallsyms]  [k] netif_receive_skb_sk
     1.43%  kpktgend_0      [kernel.kallsyms]  [k] __build_skb

* Without this patch + tc ingress:

        tc filter add dev eth4 parent ffff: protocol ip prio 1 \
                u32 match ip dst 4.3.2.1/32

Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
  10788648pps 5178Mb/sec (5178551040bps) errors: 100000000

    40.99%  kpktgend_0   [kernel.kallsyms]  [k] __netif_receive_skb_core
    17.50%  kpktgend_0   [kernel.kallsyms]  [k] kfree_skb
    11.77%  kpktgend_0   [cls_u32]          [k] u32_classify
     5.62%  kpktgend_0   [kernel.kallsyms]  [k] tc_classify_compat
     5.18%  kpktgend_0   [pktgen]           [k] pktgen_thread_worker
     3.23%  kpktgend_0   [kernel.kallsyms]  [k] tc_classify
     2.97%  kpktgend_0   [kernel.kallsyms]  [k] ip_rcv
     1.83%  kpktgend_0   [kernel.kallsyms]  [k] netif_receive_skb_internal
     1.50%  kpktgend_0   [kernel.kallsyms]  [k] netif_receive_skb_sk
     0.99%  kpktgend_0   [kernel.kallsyms]  [k] __build_skb

* With this patch + tc ingress:

        tc filter add dev eth4 parent ffff: protocol ip prio 1 \
                u32 match ip dst 4.3.2.1/32

Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
  10743194pps 5156Mb/sec (5156733120bps) errors: 100000000

    42.01%  kpktgend_0   [kernel.kallsyms]   [k] __netif_receive_skb_core
    17.78%  kpktgend_0   [kernel.kallsyms]   [k] kfree_skb
    11.70%  kpktgend_0   [cls_u32]           [k] u32_classify
     5.46%  kpktgend_0   [kernel.kallsyms]   [k] tc_classify_compat
     5.16%  kpktgend_0   [pktgen]            [k] pktgen_thread_worker
     2.98%  kpktgend_0   [kernel.kallsyms]   [k] ip_rcv
     2.84%  kpktgend_0   [kernel.kallsyms]   [k] tc_classify
     1.96%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_internal
     1.57%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_sk

Note that the results are very similar before and after.

I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.

Using gcc version 4.8.4 on:

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                8
[...]
L1d cache:             16K
L1i cache:             64K
L2 cache:              2048K
L3 cache:              8192K

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h         |  3 +++
 include/linux/netfilter.h         |  1 +
 include/linux/netfilter_ingress.h | 41 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/netfilter.h    |  6 ++++++
 net/core/dev.c                    | 36 ++++++++++++++++++++++++++++++++++
 net/netfilter/Kconfig             |  7 +++++++
 net/netfilter/core.c              | 31 ++++++++++++++++++++++++++++-
 7 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/netfilter_ingress.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d3ed01c18247..51f8d2f5dc3f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1656,6 +1656,9 @@ struct net_device {
 	struct tcf_proto __rcu  *ingress_cl_list;
 #endif
 	struct netdev_queue __rcu *ingress_queue;
+#ifdef CONFIG_NETFILTER_INGRESS
+	struct list_head	nf_hooks_ingress;
+#endif
 
 	unsigned char		broadcast[MAX_ADDR_LEN];
 #ifdef CONFIG_RFS_ACCEL
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 49d00638d1fa..f5ff5d156da8 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -86,6 +86,7 @@ struct nf_hook_ops {
 
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
+	struct net_device	*dev;
 	struct module		*owner;
 	void			*priv;
 	u_int8_t		pf;
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
new file mode 100644
index 000000000000..cb0727fe2b3d
--- /dev/null
+++ b/include/linux/netfilter_ingress.h
@@ -0,0 +1,41 @@
+#ifndef _NETFILTER_INGRESS_H_
+#define _NETFILTER_INGRESS_H_
+
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+	return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
+				   NFPROTO_NETDEV, NF_NETDEV_INGRESS);
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	struct nf_hook_state state;
+
+	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
+			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
+			   skb->dev, NULL, NULL);
+	return nf_hook_slow(skb, &state);
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev)
+{
+	INIT_LIST_HEAD(&dev->nf_hooks_ingress);
+}
+#else /* CONFIG_NETFILTER_INGRESS */
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev) {}
+#endif /* CONFIG_NETFILTER_INGRESS */
+#endif /* _NETFILTER_INGRESS_H_ */
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index ef1b1f88ca18..177027cce6b3 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -51,11 +51,17 @@ enum nf_inet_hooks {
 	NF_INET_NUMHOOKS
 };
 
+enum nf_dev_hooks {
+	NF_NETDEV_INGRESS,
+	NF_NETDEV_NUMHOOKS
+};
+
 enum {
 	NFPROTO_UNSPEC =  0,
 	NFPROTO_INET   =  1,
 	NFPROTO_IPV4   =  2,
 	NFPROTO_ARP    =  3,
+	NFPROTO_NETDEV =  5,
 	NFPROTO_BRIDGE =  7,
 	NFPROTO_IPV6   = 10,
 	NFPROTO_DECNET = 12,
diff --git a/net/core/dev.c b/net/core/dev.c
index a5ef90016ce7..29f0d6e6542c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@
 #include <linux/if_macvlan.h>
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
+#include <linux/netfilter_ingress.h>
 
 #include "net-sysfs.h"
 
@@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 
 	return skb;
 }
+#else
+static inline struct sk_buff *handle_ing(struct sk_buff *skb,
+					 struct packet_type **pt_prev,
+					 int *ret, struct net_device *orig_dev)
+{
+	return skb;
+}
 #endif
 
 /**
@@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 	}
 }
 
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+			     int *ret, struct net_device *orig_dev)
+{
+	if (nf_hook_ingress_active(skb)) {
+		if (*pt_prev) {
+			*ret = deliver_skb(skb, *pt_prev, orig_dev);
+			*pt_prev = NULL;
+		}
+
+		return nf_hook_ingress(skb);
+	}
+	return 0;
+}
+#else
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+			     int *ret, struct net_device *orig_dev)
+{
+	return 0;
+}
+#endif
+
 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
 	struct packet_type *ptype, *pt_prev;
@@ -3803,6 +3833,9 @@ skip_taps:
 		skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
 		if (!skb)
 			goto unlock;
+
+		if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+			goto unlock;
 	}
 #endif
 #ifdef CONFIG_NET_CLS_ACT
@@ -6968,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->group = INIT_NETDEV_GROUP;
 	if (!dev->ethtool_ops)
 		dev->ethtool_ops = &default_ethtool_ops;
+
+	nf_hook_ingress_init(dev);
+
 	return dev;
 
 free_all:
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f70e34a68f70..db1c674397ad 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,6 +1,13 @@
 menu "Core Netfilter Configuration"
 	depends on NET && INET && NETFILTER
 
+config NETFILTER_INGRESS
+	bool "Netfilter ingress support"
+	select NET_INGRESS
+	help
+	  This allows you to classify packets from ingress using the Netfilter
+	  infrastructure.
+
 config NETFILTER_NETLINK
 	tristate
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index e418cfd603c0..653e32eac08c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);
 
 int nf_register_hook(struct nf_hook_ops *reg)
 {
+	struct list_head *nf_hook_list;
 	struct nf_hook_ops *elem;
 
 	mutex_lock(&nf_hook_mutex);
-	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
+	switch (reg->pf) {
+	case NFPROTO_NETDEV:
+#ifdef CONFIG_NETFILTER_INGRESS
+		if (reg->hooknum == NF_NETDEV_INGRESS) {
+			BUG_ON(reg->dev == NULL);
+			nf_hook_list = &reg->dev->nf_hooks_ingress;
+			net_inc_ingress_queue();
+			break;
+		}
+#endif
+		/* Fall through. */
+	default:
+		nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
+		break;
+	}
+
+	list_for_each_entry(elem, nf_hook_list, list) {
 		if (reg->priority < elem->priority)
 			break;
 	}
@@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
 	mutex_lock(&nf_hook_mutex);
 	list_del_rcu(&reg->list);
 	mutex_unlock(&nf_hook_mutex);
+	switch (reg->pf) {
+	case NFPROTO_NETDEV:
+#ifdef CONFIG_NETFILTER_INGRESS
+		if (reg->hooknum == NF_NETDEV_INGRESS) {
+			net_dec_ingress_queue();
+			break;
+		}
+		break;
+#endif
+	default:
+		break;
+	}
 #ifdef HAVE_JUMP_LABEL
 	static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
-- 
cgit v1.2.3


From 4c12adad26f059fa207d6b07aa61f39bc459211b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Apr 2015 16:36:49 -0300
Subject: [media] dvb: Document FE_SCALE_DECIBEL units consistently

In comments and in the documentation, the units of properties marked
with the FE_SCALE_DECIBEL scale are specified in terms of 1/1000 dB
or 0.0001 dB.  This is inconsistent, however, as 1/1000 is 0.001,
not 0.0001.

Note that the v4l-utils divide the value by 1000 for the signal
strength suggesting that the 1/1000 is correct.

Settle on millidecibels, ie. 1/1000dB or 0.001dB.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 4 ++--
 include/uapi/linux/dvb/frontend.h               | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 3018564ddfd9..7ddab2ba9b40 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -953,7 +953,7 @@ enum fe_interleaving {
 		<para>Possible scales for this metric are:</para>
 		<itemizedlist mark='bullet'>
 			<listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
-			<listitem><para><constant>FE_SCALE_DECIBEL</constant> - signal strength is in 0.0001 dBm units, power measured in miliwatts. This value is generally negative.</para></listitem>
+			<listitem><para><constant>FE_SCALE_DECIBEL</constant> - signal strength is in 0.001 dBm units, power measured in miliwatts. This value is generally negative.</para></listitem>
 			<listitem><para><constant>FE_SCALE_RELATIVE</constant> - The frontend provides a 0% to 100% measurement for power (actually, 0 to 65535).</para></listitem>
 		</itemizedlist>
 	</section>
@@ -963,7 +963,7 @@ enum fe_interleaving {
 		<para>Possible scales for this metric are:</para>
 		<itemizedlist mark='bullet'>
 			<listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
-			<listitem><para><constant>FE_SCALE_DECIBEL</constant> - Signal/Noise ratio is in 0.0001 dB units.</para></listitem>
+			<listitem><para><constant>FE_SCALE_DECIBEL</constant> - Signal/Noise ratio is in 0.001 dB units.</para></listitem>
 			<listitem><para><constant>FE_SCALE_RELATIVE</constant> - The frontend provides a 0% to 100% measurement for Signal/Noise (actually, 0 to 65535).</para></listitem>
 		</itemizedlist>
 	</section>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index c56d77c496a5..466f56997272 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -467,7 +467,7 @@ struct dtv_cmds_h {
  * @FE_SCALE_NOT_AVAILABLE: That QoS measure is not available. That
  *			    could indicate a temporary or a permanent
  *			    condition.
- * @FE_SCALE_DECIBEL: The scale is measured in 0.0001 dB steps, typically
+ * @FE_SCALE_DECIBEL: The scale is measured in 0.001 dB steps, typically
  *		  used on signal measures.
  * @FE_SCALE_RELATIVE: The scale is a relative percentual measure,
  *			ranging from 0 (0%) to 0xffff (100%).
@@ -516,7 +516,7 @@ struct dtv_stats {
 	__u8 scale;	/* enum fecap_scale_params type */
 	union {
 		__u64 uvalue;	/* for counters and relative scales */
-		__s64 svalue;	/* for 0.0001 dB measures */
+		__s64 svalue;	/* for 0.001 dB measures */
 	};
 } __attribute__ ((packed));
 
-- 
cgit v1.2.3


From 29a5d3eb9a7612b26ba098a0db65e54372612d07 Mon Sep 17 00:00:00 2001
From: Andrew Lewycky <Andrew.Lewycky@amd.com>
Date: Sun, 7 Dec 2014 17:05:11 +0200
Subject: drm/amdkfd: add events IOCTL set definitions

- AMDKFD_IOC_CREATE_EVENT:
	Creates a new event of a specified type

- AMDKFD_IOC_DESTROY_EVENT:
	Destroys an existing event

- AMDKFD_IOC_SET_EVENT:
	Signal an existing event

- AMDKFD_IOC_RESET_EVENT:
	Reset an existing event

- AMDKFD_IOC_WAIT_EVENTS:
	Wait on event(s) until they are signaled

v2:

- Move the limit of the signal events to kfd_ioctl.h so it
  can be used by userspace

v3:
- Change all bool fields in struct kfd_memory_exception_failure
to uint32_t

Signed-off-by: Andrew Lewycky <Andrew.Lewycky@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 +++++++++++++++
 include/uapi/linux/kfd_ioctl.h           | 96 +++++++++++++++++++++++++++++++-
 2 files changed, 139 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 19a4fba46e4e..9933b2efe5dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -514,6 +514,36 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
 	return 0;
 }
 
+static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	return -ENODEV;
+}
+
+static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	return -ENODEV;
+}
+
+static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	return -ENODEV;
+}
+
+static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	return -ENODEV;
+}
+
+static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	return -ENODEV;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
 
@@ -539,6 +569,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
 			kfd_ioctl_update_queue, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
+			kfd_ioctl_create_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
+			kfd_ioctl_destroy_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
+			kfd_ioctl_set_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
+			kfd_ioctl_reset_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
+			kfd_ioctl_wait_events, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index af94f31e33ac..4ca35a8f9891 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -27,7 +27,7 @@
 #include <linux/ioctl.h>
 
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 0
+#define KFD_IOCTL_MINOR_VERSION 1
 
 struct kfd_ioctl_get_version_args {
 	uint32_t major_version;	/* from KFD */
@@ -128,6 +128,83 @@ struct kfd_ioctl_get_process_apertures_args {
 	uint32_t pad;
 };
 
+/* Matching HSA_EVENTTYPE */
+#define KFD_IOC_EVENT_SIGNAL			0
+#define KFD_IOC_EVENT_NODECHANGE		1
+#define KFD_IOC_EVENT_DEVICESTATECHANGE		2
+#define KFD_IOC_EVENT_HW_EXCEPTION		3
+#define KFD_IOC_EVENT_SYSTEM_EVENT		4
+#define KFD_IOC_EVENT_DEBUG_EVENT		5
+#define KFD_IOC_EVENT_PROFILE_EVENT		6
+#define KFD_IOC_EVENT_QUEUE_EVENT		7
+#define KFD_IOC_EVENT_MEMORY			8
+
+#define KFD_IOC_WAIT_RESULT_COMPLETE		0
+#define KFD_IOC_WAIT_RESULT_TIMEOUT		1
+#define KFD_IOC_WAIT_RESULT_FAIL		2
+
+#define KFD_SIGNAL_EVENT_LIMIT			256
+
+struct kfd_ioctl_create_event_args {
+	uint64_t event_page_offset;	/* from KFD */
+	uint32_t event_trigger_data;	/* from KFD - signal events only */
+	uint32_t event_type;		/* to KFD */
+	uint32_t auto_reset;		/* to KFD */
+	uint32_t node_id;		/* to KFD - only valid for certain
+							event types */
+	uint32_t event_id;		/* from KFD */
+	uint32_t event_slot_index;	/* from KFD */
+};
+
+struct kfd_ioctl_destroy_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_set_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_reset_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_memory_exception_failure {
+	uint32_t NotPresent;	/* Page not present or supervisor privilege */
+	uint32_t ReadOnly;	/* Write access to a read-only page */
+	uint32_t NoExecute;	/* Execute access to a page marked NX */
+	uint32_t pad;
+};
+
+/* memory exception data*/
+struct kfd_hsa_memory_exception_data {
+	struct kfd_memory_exception_failure failure;
+	uint64_t va;
+	uint32_t gpu_id;
+	uint32_t pad;
+};
+
+/* Event data*/
+struct kfd_event_data {
+	union {
+		struct kfd_hsa_memory_exception_data memory_exception_data;
+	};				/* From KFD */
+	uint64_t kfd_event_data_ext;	/* pointer to an extension structure
+					   for future exception types */
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_wait_events_args {
+	uint64_t events_ptr;		/* to KFD */
+	uint32_t num_events;		/* to KFD */
+	uint32_t wait_for_all;		/* to KFD */
+	uint32_t timeout;		/* to KFD */
+	uint32_t wait_result;		/* from KFD */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -155,7 +232,22 @@ struct kfd_ioctl_get_process_apertures_args {
 #define AMDKFD_IOC_UPDATE_QUEUE			\
 		AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
 
+#define AMDKFD_IOC_CREATE_EVENT			\
+		AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
+
+#define AMDKFD_IOC_DESTROY_EVENT		\
+		AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
+
+#define AMDKFD_IOC_SET_EVENT			\
+		AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
+
+#define AMDKFD_IOC_RESET_EVENT			\
+		AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
+
+#define AMDKFD_IOC_WAIT_EVENTS			\
+		AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x08
+#define AMDKFD_COMMAND_END		0x0D
 
 #endif
-- 
cgit v1.2.3


From c9a70d43461d83818825ae065bb8fc887421e150 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav.pandit@avagotech.com>
Date: Mon, 18 May 2015 16:31:47 +0530
Subject: net-next: ethtool: Added port speed macros.

Signed-off-by: Parav Pandit <parav.pandit@avagotech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 2e49fc880d29..ae832b45b44c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1264,15 +1264,19 @@ enum ethtool_sfeatures_retval_bits {
  * it was forced up into this mode or autonegotiated.
  */
 
-/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|10|20|40|56]GbE. */
+/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|5|10|20|25|40|50|56|100]GbE. */
 #define SPEED_10		10
 #define SPEED_100		100
 #define SPEED_1000		1000
 #define SPEED_2500		2500
+#define SPEED_5000		5000
 #define SPEED_10000		10000
 #define SPEED_20000		20000
+#define SPEED_25000		25000
 #define SPEED_40000		40000
+#define SPEED_50000		50000
 #define SPEED_56000		56000
+#define SPEED_100000		100000
 
 #define SPEED_UNKNOWN		-1
 
-- 
cgit v1.2.3


From 928f81aa619d845d6faa5a459cdbc18b7a78ddce Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 20 May 2015 09:01:20 -0700
Subject: ARM: OMAP1: Move UART defines to prepare for sparse IRQ

These have been indirectly included via asm/irqs.h that
has included mach/hardware.h unless SPARSE_IRQ is specified.
Let's move them to where the other OMAP serial defines for
8250 are.

Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/include/mach/serial.h | 5 -----
 include/uapi/linux/serial_reg.h           | 3 +++
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/arm/mach-omap1/include/mach/serial.h b/arch/arm/mach-omap1/include/mach/serial.h
index 2ce6a2db470b..4700e384c3d9 100644
--- a/arch/arm/mach-omap1/include/mach/serial.h
+++ b/arch/arm/mach-omap1/include/mach/serial.h
@@ -27,11 +27,6 @@
  */
 #define OMAP_UART_INFO_OFS	0x3ffc
 
-/* OMAP1 serial ports */
-#define OMAP1_UART1_BASE	0xfffb0000
-#define OMAP1_UART2_BASE	0xfffb0800
-#define OMAP1_UART3_BASE	0xfffb9800
-
 #define OMAP_PORT_SHIFT		2
 #define OMAP7XX_PORT_SHIFT	0
 
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index e9b4cb0cd7ed..1e5ac4e776da 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -331,6 +331,9 @@
  * Extra serial register definitions for the internal UARTs
  * in TI OMAP processors.
  */
+#define OMAP1_UART1_BASE	0xfffb0000
+#define OMAP1_UART2_BASE	0xfffb0800
+#define OMAP1_UART3_BASE	0xfffb9800
 #define UART_OMAP_MDR1		0x08	/* Mode definition register */
 #define UART_OMAP_MDR2		0x09	/* Mode definition register 2 */
 #define UART_OMAP_SCR		0x10	/* Supplementary control register */
-- 
cgit v1.2.3


From 04fd61ab36ec065e194ab5e74ae34a5240d992bb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 19 May 2015 16:59:03 -0700
Subject: bpf: allow bpf programs to tail-call other bpf programs

introduce bpf_tail_call(ctx, &jmp_table, index) helper function
which can be used from BPF programs like:
int bpf_prog(struct pt_regs *ctx)
{
  ...
  bpf_tail_call(ctx, &jmp_table, index);
  ...
}
that is roughly equivalent to:
int bpf_prog(struct pt_regs *ctx)
{
  ...
  if (jmp_table[index])
    return (*jmp_table[index])(ctx);
  ...
}
The important detail that it's not a normal call, but a tail call.
The kernel stack is precious, so this helper reuses the current
stack frame and jumps into another BPF program without adding
extra call frame.
It's trivially done in interpreter and a bit trickier in JITs.
In case of x64 JIT the bigger part of generated assembler prologue
is common for all programs, so it is simply skipped while jumping.
Other JITs can do similar prologue-skipping optimization or
do stack unwind before jumping into the next program.

bpf_tail_call() arguments:
ctx - context pointer
jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table
index - index in the jump table

Since all BPF programs are idenitified by file descriptor, user space
need to populate the jmp_table with FDs of other BPF programs.
If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere
and program execution continues as normal.

New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can
populate this jmp_table array with FDs of other bpf programs.
Programs can share the same jmp_table array or use multiple jmp_tables.

The chain of tail calls can form unpredictable dynamic loops therefore
tail_call_cnt is used to limit the number of calls and currently is set to 32.

Use cases:
Acked-by: Daniel Borkmann <daniel@iogearbox.net>

==========
- simplify complex programs by splitting them into a sequence of small programs

- dispatch routine
  For tracing and future seccomp the program may be triggered on all system
  calls, but processing of syscall arguments will be different. It's more
  efficient to implement them as:
  int syscall_entry(struct seccomp_data *ctx)
  {
     bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */);
     ... default: process unknown syscall ...
  }
  int sys_write_event(struct seccomp_data *ctx) {...}
  int sys_read_event(struct seccomp_data *ctx) {...}
  syscall_jmp_table[__NR_write] = sys_write_event;
  syscall_jmp_table[__NR_read] = sys_read_event;

  For networking the program may call into different parsers depending on
  packet format, like:
  int packet_parser(struct __sk_buff *skb)
  {
     ... parse L2, L3 here ...
     __u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol));
     bpf_tail_call(skb, &ipproto_jmp_table, ipproto);
     ... default: process unknown protocol ...
  }
  int parse_tcp(struct __sk_buff *skb) {...}
  int parse_udp(struct __sk_buff *skb) {...}
  ipproto_jmp_table[IPPROTO_TCP] = parse_tcp;
  ipproto_jmp_table[IPPROTO_UDP] = parse_udp;

- for TC use case, bpf_tail_call() allows to implement reclassify-like logic

- bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table
  are atomic, so user space can build chains of BPF programs on the fly

Implementation details:
=======================
- high performance of bpf_tail_call() is the goal.
  It could have been implemented without JIT changes as a wrapper on top of
  BPF_PROG_RUN() macro, but with two downsides:
  . all programs would have to pay performance penalty for this feature and
    tail call itself would be slower, since mandatory stack unwind, return,
    stack allocate would be done for every tailcall.
  . tailcall would be limited to programs running preempt_disabled, since
    generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would
    need to be either global per_cpu variable accessed by helper and by wrapper
    or global variable protected by locks.

  In this implementation x64 JIT bypasses stack unwind and jumps into the
  callee program after prologue.

- bpf_prog_array_compatible() ensures that prog_type of callee and caller
  are the same and JITed/non-JITed flag is the same, since calling JITed
  program from non-JITed is invalid, since stack frames are different.
  Similarly calling kprobe type program from socket type program is invalid.

- jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map'
  abstraction, its user space API and all of verifier logic.
  It's in the existing arraymap.c file, since several functions are
  shared with regular array map.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  22 +++++++++
 include/linux/filter.h   |   2 +-
 include/uapi/linux/bpf.h |  10 +++++
 kernel/bpf/arraymap.c    | 113 ++++++++++++++++++++++++++++++++++++++++++++---
 kernel/bpf/core.c        |  73 +++++++++++++++++++++++++++++-
 kernel/bpf/syscall.c     |  23 +++++++++-
 kernel/bpf/verifier.c    |  17 +++++++
 kernel/trace/bpf_trace.c |   2 +
 net/core/filter.c        |   2 +
 9 files changed, 255 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d5cda067115a..8821b9a8689e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -126,6 +126,27 @@ struct bpf_prog_aux {
 	struct work_struct work;
 };
 
+struct bpf_array {
+	struct bpf_map map;
+	u32 elem_size;
+	/* 'ownership' of prog_array is claimed by the first program that
+	 * is going to use this map or by the first program which FD is stored
+	 * in the map to make sure that all callers and callees have the same
+	 * prog_type and JITed flag
+	 */
+	enum bpf_prog_type owner_prog_type;
+	bool owner_jited;
+	union {
+		char value[0] __aligned(8);
+		struct bpf_prog *prog[0] __aligned(8);
+	};
+};
+#define MAX_TAIL_CALL_CNT 32
+
+u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
+void bpf_prog_array_map_clear(struct bpf_map *map);
+bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+
 #ifdef CONFIG_BPF_SYSCALL
 void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
@@ -160,5 +181,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
+extern const struct bpf_func_proto bpf_tail_call_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 200be4a74a33..17724f6ea983 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -378,7 +378,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-void bpf_prog_select_runtime(struct bpf_prog *fp);
+int bpf_prog_select_runtime(struct bpf_prog *fp);
 void bpf_prog_free(struct bpf_prog *fp);
 
 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a9ebdf5701e8..f0a9af8b4dae 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -113,6 +113,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_UNSPEC,
 	BPF_MAP_TYPE_HASH,
 	BPF_MAP_TYPE_ARRAY,
+	BPF_MAP_TYPE_PROG_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -210,6 +211,15 @@ enum bpf_func_id {
 	 * Return: 0 on success
 	 */
 	BPF_FUNC_l4_csum_replace,
+
+	/**
+	 * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program
+	 * @ctx: context pointer passed to next program
+	 * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+	 * @index: index inside array that selects specific program to run
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_tail_call,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8a6616583f38..614bcd4c1d74 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -14,12 +14,7 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-
-struct bpf_array {
-	struct bpf_map map;
-	u32 elem_size;
-	char value[0] __aligned(8);
-};
+#include <linux/filter.h>
 
 /* Called from syscall */
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
@@ -154,3 +149,109 @@ static int __init register_array_map(void)
 	return 0;
 }
 late_initcall(register_array_map);
+
+static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
+{
+	/* only bpf_prog file descriptors can be stored in prog_array map */
+	if (attr->value_size != sizeof(u32))
+		return ERR_PTR(-EINVAL);
+	return array_map_alloc(attr);
+}
+
+static void prog_array_map_free(struct bpf_map *map)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	int i;
+
+	synchronize_rcu();
+
+	/* make sure it's empty */
+	for (i = 0; i < array->map.max_entries; i++)
+		BUG_ON(array->prog[i] != NULL);
+	kvfree(array);
+}
+
+static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	return NULL;
+}
+
+/* only called from syscall */
+static int prog_array_map_update_elem(struct bpf_map *map, void *key,
+				      void *value, u64 map_flags)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct bpf_prog *prog, *old_prog;
+	u32 index = *(u32 *)key, ufd;
+
+	if (map_flags != BPF_ANY)
+		return -EINVAL;
+
+	if (index >= array->map.max_entries)
+		return -E2BIG;
+
+	ufd = *(u32 *)value;
+	prog = bpf_prog_get(ufd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	if (!bpf_prog_array_compatible(array, prog)) {
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	old_prog = xchg(array->prog + index, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	return 0;
+}
+
+static int prog_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct bpf_prog *old_prog;
+	u32 index = *(u32 *)key;
+
+	if (index >= array->map.max_entries)
+		return -E2BIG;
+
+	old_prog = xchg(array->prog + index, NULL);
+	if (old_prog) {
+		bpf_prog_put(old_prog);
+		return 0;
+	} else {
+		return -ENOENT;
+	}
+}
+
+/* decrement refcnt of all bpf_progs that are stored in this map */
+void bpf_prog_array_map_clear(struct bpf_map *map)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	int i;
+
+	for (i = 0; i < array->map.max_entries; i++)
+		prog_array_map_delete_elem(map, &i);
+}
+
+static const struct bpf_map_ops prog_array_ops = {
+	.map_alloc = prog_array_map_alloc,
+	.map_free = prog_array_map_free,
+	.map_get_next_key = array_map_get_next_key,
+	.map_lookup_elem = prog_array_map_lookup_elem,
+	.map_update_elem = prog_array_map_update_elem,
+	.map_delete_elem = prog_array_map_delete_elem,
+};
+
+static struct bpf_map_type_list prog_array_type __read_mostly = {
+	.ops = &prog_array_ops,
+	.type = BPF_MAP_TYPE_PROG_ARRAY,
+};
+
+static int __init register_prog_array_map(void)
+{
+	bpf_register_map_type(&prog_array_type);
+	return 0;
+}
+late_initcall(register_prog_array_map);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 54f0e7fcd0e2..d44b25cbe460 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -176,6 +176,15 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	return 0;
 }
 
+const struct bpf_func_proto bpf_tail_call_proto = {
+	.func = NULL,
+	.gpl_only = false,
+	.ret_type = RET_VOID,
+	.arg1_type = ARG_PTR_TO_CTX,
+	.arg2_type = ARG_CONST_MAP_PTR,
+	.arg3_type = ARG_ANYTHING,
+};
+
 /**
  *	__bpf_prog_run - run eBPF program on a given context
  *	@ctx: is the data we are operating on
@@ -244,6 +253,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
 		/* Call instruction */
 		[BPF_JMP | BPF_CALL] = &&JMP_CALL,
+		[BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
 		/* Jumps */
 		[BPF_JMP | BPF_JA] = &&JMP_JA,
 		[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
@@ -286,6 +296,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
 		[BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
 	};
+	u32 tail_call_cnt = 0;
 	void *ptr;
 	int off;
 
@@ -431,6 +442,30 @@ select_insn:
 						       BPF_R4, BPF_R5);
 		CONT;
 
+	JMP_TAIL_CALL: {
+		struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
+		struct bpf_array *array = container_of(map, struct bpf_array, map);
+		struct bpf_prog *prog;
+		u64 index = BPF_R3;
+
+		if (unlikely(index >= array->map.max_entries))
+			goto out;
+
+		if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
+			goto out;
+
+		tail_call_cnt++;
+
+		prog = READ_ONCE(array->prog[index]);
+		if (unlikely(!prog))
+			goto out;
+
+		ARG1 = BPF_R1;
+		insn = prog->insnsi;
+		goto select_insn;
+out:
+		CONT;
+	}
 	/* JMP */
 	JMP_JA:
 		insn += insn->off;
@@ -619,6 +654,40 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog)
 {
 }
 
+bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp)
+{
+	if (array->owner_prog_type) {
+		if (array->owner_prog_type != fp->type)
+			return false;
+		if (array->owner_jited != fp->jited)
+			return false;
+	} else {
+		array->owner_prog_type = fp->type;
+		array->owner_jited = fp->jited;
+	}
+	return true;
+}
+
+static int check_tail_call(const struct bpf_prog *fp)
+{
+	struct bpf_prog_aux *aux = fp->aux;
+	int i;
+
+	for (i = 0; i < aux->used_map_cnt; i++) {
+		struct bpf_array *array;
+		struct bpf_map *map;
+
+		map = aux->used_maps[i];
+		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+			continue;
+		array = container_of(map, struct bpf_array, map);
+		if (!bpf_prog_array_compatible(array, fp))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  *	bpf_prog_select_runtime - select execution runtime for BPF program
  *	@fp: bpf_prog populated with internal BPF program
@@ -626,7 +695,7 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog)
  * try to JIT internal BPF program, if JIT is not available select interpreter
  * BPF program will be executed via BPF_PROG_RUN() macro
  */
-void bpf_prog_select_runtime(struct bpf_prog *fp)
+int bpf_prog_select_runtime(struct bpf_prog *fp)
 {
 	fp->bpf_func = (void *) __bpf_prog_run;
 
@@ -634,6 +703,8 @@ void bpf_prog_select_runtime(struct bpf_prog *fp)
 	bpf_int_jit_compile(fp);
 	/* Lock whole bpf_prog as read-only */
 	bpf_prog_lock_ro(fp);
+
+	return check_tail_call(fp);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3bae6c591914..98a69bd83069 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -68,6 +68,12 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
 {
 	struct bpf_map *map = filp->private_data;
 
+	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
+		/* prog_array stores refcnt-ed bpf_prog pointers
+		 * release them all when user space closes prog_array_fd
+		 */
+		bpf_prog_array_map_clear(map);
+
 	bpf_map_put(map);
 	return 0;
 }
@@ -392,6 +398,19 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 			 */
 			BUG_ON(!prog->aux->ops->get_func_proto);
 
+			if (insn->imm == BPF_FUNC_tail_call) {
+				/* mark bpf_tail_call as different opcode
+				 * to avoid conditional branch in
+				 * interpeter for every normal call
+				 * and to prevent accidental JITing by
+				 * JIT compiler that doesn't support
+				 * bpf_tail_call yet
+				 */
+				insn->imm = 0;
+				insn->code |= BPF_X;
+				continue;
+			}
+
 			fn = prog->aux->ops->get_func_proto(insn->imm);
 			/* all functions that have prototype and verifier allowed
 			 * programs to call them, must be real in-kernel functions
@@ -532,7 +551,9 @@ static int bpf_prog_load(union bpf_attr *attr)
 	fixup_bpf_calls(prog);
 
 	/* eBPF program is ready to be JITed */
-	bpf_prog_select_runtime(prog);
+	err = bpf_prog_select_runtime(prog);
+	if (err < 0)
+		goto free_used_maps;
 
 	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
 	if (err < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 47dcd3aa6e23..cfd9a40b9a5a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -907,6 +907,23 @@ static int check_call(struct verifier_env *env, int func_id)
 			fn->ret_type, func_id);
 		return -EINVAL;
 	}
+
+	if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
+	    func_id != BPF_FUNC_tail_call)
+		/* prog_array map type needs extra care:
+		 * only allow to pass it into bpf_tail_call() for now.
+		 * bpf_map_delete_elem() can be allowed in the future,
+		 * while bpf_map_update_elem() must only be done via syscall
+		 */
+		return -EINVAL;
+
+	if (func_id == BPF_FUNC_tail_call &&
+	    map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+		/* don't allow any other map type to be passed into
+		 * bpf_tail_call()
+		 */
+		return -EINVAL;
+
 	return 0;
 }
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2d56ce501632..646445e41bd4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -172,6 +172,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_probe_read_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+	case BPF_FUNC_tail_call:
+		return &bpf_tail_call_proto;
 
 	case BPF_FUNC_trace_printk:
 		/*
diff --git a/net/core/filter.c b/net/core/filter.c
index 6805717be614..3adcca6f17a4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1421,6 +1421,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_tail_call:
+		return &bpf_tail_call_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From 2efd055c53c06b7e89c167c98069bab9afce7e59 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <mleitner@redhat.com>
Date: Wed, 20 May 2015 16:35:41 -0700
Subject: tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info

This patch tracks the total number of inbound and outbound segments on a
TCP socket. One may use this number to have an idea on connection
quality when compared against the retransmissions.

RFC4898 named these : tcpEStatsPerfSegsIn and tcpEStatsPerfSegsOut

These are a 32bit field each and can be fetched both from TCP_INFO
getsockopt() if one has a handle on a TCP socket, or from inet_diag
netlink facility (iproute2/ss patch will follow)

Note that tp->segs_out was placed near tp->snd_nxt for good data
locality and minimal performance impact, while tp->segs_in was placed
near tp->bytes_received for the same reason.

Join work with Eric Dumazet.

Note that received SYN are accounted on the listener, but sent SYNACK
are not accounted.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 7 ++++++-
 include/uapi/linux/tcp.h | 4 +++-
 net/ipv4/tcp.c           | 2 ++
 net/ipv4/tcp_ipv4.c      | 1 +
 net/ipv4/tcp_minisocks.c | 1 +
 net/ipv4/tcp_output.c    | 1 +
 net/ipv6/tcp_ipv6.c      | 1 +
 7 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e6fb5df22db1..f0212026c77f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -149,11 +149,16 @@ struct tcp_sock {
 				 * sum(delta(rcv_nxt)), or how many bytes
 				 * were acked.
 				 */
+	u32	segs_in;	/* RFC4898 tcpEStatsPerfSegsIn
+				 * total number of segments in.
+				 */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
 	u32	copied_seq;	/* Head of yet unread data		*/
 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
  	u32	snd_nxt;	/* Next sequence we send		*/
-
+	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut
+				 * The total number of segments sent.
+				 */
 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 51ebedba577f..65a77b071e22 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -192,8 +192,10 @@ struct tcp_info {
 
 	__u64	tcpi_pacing_rate;
 	__u64	tcpi_max_pacing_rate;
-	__u64	tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+	__u64	tcpi_bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
 	__u64	tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
+	__u32	tcpi_segs_out;	     /* RFC4898 tcpEStatsPerfSegsOut */
+	__u32	tcpi_segs_in;	     /* RFC4898 tcpEStatsPerfSegsIn */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0a3f9a00565b..7f3e721b9e69 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2695,6 +2695,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	spin_lock_bh(&sk->sk_lock.slock);
 	info->tcpi_bytes_acked = tp->bytes_acked;
 	info->tcpi_bytes_received = tp->bytes_received;
+	info->tcpi_segs_out = tp->segs_out;
+	info->tcpi_segs_in = tp->segs_in;
 	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0cc4b5a630cd..feb875769b8d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1626,6 +1626,7 @@ process:
 	skb->dev = NULL;
 
 	bh_lock_sock_nested(sk);
+	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ebe2ab2596ed..b62d15c86946 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -448,6 +448,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
+		newtp->segs_in = 0;
 
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3aebe0157dfa..534e5fdb04c1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1027,6 +1027,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
 			      tcp_skb_pcount(skb));
 
+	tp->segs_out += tcp_skb_pcount(skb);
 	/* OK, its time to fill skb_shinfo(skb)->gso_segs */
 	skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b6575d665568..beac6bf840b9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1421,6 +1421,7 @@ process:
 	skb->dev = NULL;
 
 	bh_lock_sock_nested(sk);
+	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
-- 
cgit v1.2.3


From bd5850d39f10f9d216bff69bcbf5938680b862ae Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 21 May 2015 02:26:24 +0200
Subject: net: sched: pkt_cls: remove unused macros from uapi

Jamal points out that this header also contains kernel internal magic that
cannot be used from userspace for anything meaningful.

Lets remove what the kernel doesn't use anymore and wrap remainder with
__KERNEL__.

Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 39fb53d67b11..4f0d1bc3647d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/pkt_sched.h>
 
+#ifdef __KERNEL__
 /* I think i could have done better macros ; for now this is stolen from
  * some arch/mips code - jhs
 */
@@ -35,23 +36,6 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
  *
  * */
 
-#ifndef __KERNEL__
-/* backwards compat for userspace only */
-#define TC_MUNGED          _TC_MAKEMASK1(0)
-#define SET_TC_MUNGED(v)   ( TC_MUNGED | (v & ~TC_MUNGED))
-#define CLR_TC_MUNGED(v)   ( v & ~TC_MUNGED)
-
-#define TC_OK2MUNGE        _TC_MAKEMASK1(1)
-#define SET_TC_OK2MUNGE(v)   ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE))
-#define CLR_TC_OK2MUNGE(v)   ( v & ~TC_OK2MUNGE)
-
-#define S_TC_VERD          _TC_MAKE32(2)
-#define M_TC_VERD          _TC_MAKEMASK(4,S_TC_VERD)
-#define G_TC_VERD(x)       _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD)
-#define V_TC_VERD(x)       _TC_MAKEVALUE(x,S_TC_VERD)
-#define SET_TC_VERD(v,n)   ((V_TC_VERD(n)) | (v & ~M_TC_VERD))
-#endif
-
 #define S_TC_FROM          _TC_MAKE32(6)
 #define M_TC_FROM          _TC_MAKEMASK(2,S_TC_FROM)
 #define G_TC_FROM(x)       _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM)
@@ -65,20 +49,16 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define SET_TC_NCLS(v)   ( TC_NCLS | (v & ~TC_NCLS))
 #define CLR_TC_NCLS(v)   ( v & ~TC_NCLS)
 
-#ifndef __KERNEL__
-#define S_TC_RTTL          _TC_MAKE32(9)
-#define M_TC_RTTL          _TC_MAKEMASK(3,S_TC_RTTL)
-#define G_TC_RTTL(x)       _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL)
-#define V_TC_RTTL(x)       _TC_MAKEVALUE(x,S_TC_RTTL)
-#define SET_TC_RTTL(v,n)   ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL))
-#endif
-
 #define S_TC_AT          _TC_MAKE32(12)
 #define M_TC_AT          _TC_MAKEMASK(2,S_TC_AT)
 #define G_TC_AT(x)       _TC_GETVALUE(x,S_TC_AT,M_TC_AT)
 #define V_TC_AT(x)       _TC_MAKEVALUE(x,S_TC_AT)
 #define SET_TC_AT(v,n)   ((V_TC_AT(n)) | (v & ~M_TC_AT))
 
+#define MAX_REC_LOOP 4
+#define MAX_RED_LOOP 4
+#endif
+
 /* Action attributes */
 enum {
 	TCA_ACT_UNSPEC,
@@ -98,8 +78,6 @@ enum {
 #define TCA_ACT_NOUNBIND	0
 #define TCA_ACT_REPLACE		1
 #define TCA_ACT_NOREPLACE	0
-#define MAX_REC_LOOP 4
-#define MAX_RED_LOOP 4
 
 #define TC_ACT_UNSPEC	(-1)
 #define TC_ACT_OK		0
-- 
cgit v1.2.3


From 1ce87f21edf6a071a7cc6bc77d628d7c7650d0d8 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 22 May 2015 18:17:38 +0200
Subject: iio: Add I/Q modifiers

I/Q modifiers can be used to denote signals which are represented by a
in-phase and a quadrature component.

The ABI documentation describes the I and Q modifiers for current and
voltage channels for now as those will be the most likely users.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/ABI/testing/sysfs-bus-iio | 46 +++++++++++++++++++++++++++++++++
 drivers/iio/industrialio-core.c         |  2 ++
 include/uapi/linux/iio/types.h          |  2 ++
 3 files changed, 50 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index f66262c64e2f..bbed111c31b4 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -71,6 +71,8 @@ Description:
 
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_i_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_q_raw
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -81,6 +83,11 @@ Description:
 		unique to allow association with event codes. Units after
 		application of scale and offset are millivolts.
 
+		Channels with 'i' and 'q' modifiers always exist in pairs and both
+		channels refer to the same signal. The 'i' channel contains the in-phase
+		component of the signal while the 'q' channel contains the quadrature
+		component.
+
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_raw
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
@@ -246,8 +253,16 @@ What:		/sys/bus/iio/devices/iio:deviceX/in_accel_y_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_z_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_i_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_q_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_q_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_i_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_current_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_i_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_q_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_current_q_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_current_i_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_tempY_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_temp_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_pressureY_offset
@@ -273,14 +288,22 @@ Description:
 		to the _raw output.
 
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_i_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_q_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_i_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_q_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_scale
 What:		/sys/bus/iio/devices/iio:deviceX/out_voltageY_scale
 What:		/sys/bus/iio/devices/iio:deviceX/out_altvoltageY_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_supply_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_current_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_i_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_q_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_current_i_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_current_q_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_peak_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_anglvel_scale
@@ -328,6 +351,10 @@ Description:
 
 What		/sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
+What		/sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
+What		/sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
+What		/sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
+What		/sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
@@ -1046,6 +1073,10 @@ What:		/sys/.../iio:deviceX/scan_elements/in_timestamp_en
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_supply_en
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_en
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY-voltageZ_en
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_i_en
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_q_en
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_i_en
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_q_en
 What:		/sys/.../iio:deviceX/scan_elements/in_incli_x_en
 What:		/sys/.../iio:deviceX/scan_elements/in_incli_y_en
 What:		/sys/.../iio:deviceX/scan_elements/in_pressureY_en
@@ -1064,6 +1095,10 @@ What:		/sys/.../iio:deviceX/scan_elements/in_incli_type
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_type
 What:		/sys/.../iio:deviceX/scan_elements/in_voltage_type
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_supply_type
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_i_type
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_q_type
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_i_type
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_q_type
 What:		/sys/.../iio:deviceX/scan_elements/in_timestamp_type
 What:		/sys/.../iio:deviceX/scan_elements/in_pressureY_type
 What:		/sys/.../iio:deviceX/scan_elements/in_pressure_type
@@ -1101,6 +1136,10 @@ Description:
 
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_index
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_supply_index
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_i_index
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_q_index
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_i_index
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_q_index
 What:		/sys/.../iio:deviceX/scan_elements/in_accel_x_index
 What:		/sys/.../iio:deviceX/scan_elements/in_accel_y_index
 What:		/sys/.../iio:deviceX/scan_elements/in_accel_z_index
@@ -1260,6 +1299,8 @@ Description:
 		or without compensation from tilt sensors.
 
 What:		/sys/bus/iio/devices/iio:deviceX/in_currentX_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentX_i_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentX_q_raw
 KernelVersion:	3.18
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -1268,6 +1309,11 @@ Description:
 		present, output should be considered as processed with the
 		unit in milliamps.
 
+		Channels with 'i' and 'q' modifiers always exist in pairs and both
+		channels refer to the same signal. The 'i' channel contains the in-phase
+		component of the signal while the 'q' channel contains the quadrature
+		component.
+
 What:		/sys/.../iio:deviceX/in_energy_en
 What:		/sys/.../iio:deviceX/in_distance_en
 What:		/sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_en
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 9688a88b6198..3524b0de8721 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -101,6 +101,8 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_WALKING] = "walking",
 	[IIO_MOD_STILL] = "still",
 	[IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z] = "sqrt(x^2+y^2+z^2)",
+	[IIO_MOD_I] = "i",
+	[IIO_MOD_Q] = "q",
 };
 
 /* relies on pairs of these shared then separate */
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index 5c4601935005..2f8b11722204 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -70,6 +70,8 @@ enum iio_modifier {
 	IIO_MOD_WALKING,
 	IIO_MOD_STILL,
 	IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z,
+	IIO_MOD_I,
+	IIO_MOD_Q,
 };
 
 enum iio_event_type {
-- 
cgit v1.2.3


From 7df20f2d893db42eaa1ea1e30a2573c971ec9238 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Wed, 29 Apr 2015 05:32:28 -0700
Subject: misc: mic: SCIF header file and IOCTL interface

This patch introduces the SCIF documentation in the header file
and describes the IOCTL interface for user mode. mic_overview.txt
is updated with documentation on SCIF and a new document
describing SCIF in more details is available in scif_overview.txt.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/mic/mic_overview.txt  |  28 +-
 Documentation/mic/scif_overview.txt |  98 ++++
 include/linux/scif.h                | 993 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/Kbuild           |   1 +
 include/uapi/linux/scif_ioctl.h     | 130 +++++
 5 files changed, 1238 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/mic/scif_overview.txt
 create mode 100644 include/linux/scif.h
 create mode 100644 include/uapi/linux/scif_ioctl.h

(limited to 'include/uapi/linux')

diff --git a/Documentation/mic/mic_overview.txt b/Documentation/mic/mic_overview.txt
index 77c541802ad9..1a2f2c8ec59e 100644
--- a/Documentation/mic/mic_overview.txt
+++ b/Documentation/mic/mic_overview.txt
@@ -24,6 +24,10 @@ a virtual bus called mic bus is created and virtual dma devices are
 created on it by the host/card drivers. On host the channels are private
 and used only by the host driver to transfer data for the virtio devices.
 
+The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a
+low level communications API across PCIe currently implemented for MIC.
+More details are available at scif_overview.txt.
+
 Here is a block diagram of the various components described above. The
 virtio backends are situated on the host rather than the card given better
 single threaded performance for the host compared to MIC, the ability of
@@ -47,18 +51,18 @@ the fact that the virtio block storage backend can only be on the host.
                       |               |       | Virtio over PCIe IOCTLs  |
                       |               |       +--------------------------+
 +-----------+         |               |                   |  +-----------+
-| MIC DMA   |         |               |                   |  | MIC DMA   |
-| Driver    |         |               |                   |  | Driver    |
-+-----------+         |               |                   |  +-----------+
-      |               |               |                   |        |
-+---------------+     |               |                   |  +----------------+
-|MIC virtual Bus|     |               |                   |  |MIC virtual Bus |
-+---------------+     |               |                   |  +----------------+
-      |               |               |                   |              |
-      |   +--------------+            |            +---------------+     |
-      |   |Intel MIC     |            |            |Intel MIC      |     |
-      +---|Card Driver   |            |            |Host Driver    |     |
-          +--------------+            |            +---------------+-----+
+| MIC DMA   |         |  +----------+ | +-----------+     |  | MIC DMA   |
+| Driver    |         |  |  SCIF    | | |   SCIF    |     |  | Driver    |
++-----------+         |  +----------+ | +-----------+     |  +-----------+
+      |               |       |       |       |           |        |
++---------------+     | +-----+-----+ | +-----+-----+     | +---------------+
+|MIC virtual Bus|     | |SCIF HW Bus| | |SCIF HW BUS|     | |MIC virtual Bus|
++---------------+     | +-----------+ | +-----+-----+     | +---------------+
+      |               |       |       |       |           |              |
+      |   +--------------+    |       |       |    +---------------+     |
+      |   |Intel MIC     |    |       |       |    |Intel MIC      |     |
+      +---|Card Driver   +----+       |       |    |Host Driver    |     |
+          +--------------+            |       +----+---------------+-----+
                       |               |                   |
              +-------------------------------------------------------------+
              |                                                             |
diff --git a/Documentation/mic/scif_overview.txt b/Documentation/mic/scif_overview.txt
new file mode 100644
index 000000000000..0a280d986731
--- /dev/null
+++ b/Documentation/mic/scif_overview.txt
@@ -0,0 +1,98 @@
+The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a low
+level communications API across PCIe currently implemented for MIC. Currently
+SCIF provides inter-node communication within a single host platform, where a
+node is a MIC Coprocessor or Xeon based host. SCIF abstracts the details of
+communicating over the PCIe bus while providing an API that is symmetric
+across all the nodes in the PCIe network. An important design objective for SCIF
+is to deliver the maximum possible performance given the communication
+abilities of the hardware. SCIF has been used to implement an offload compiler
+runtime and OFED support for MPI implementations for MIC coprocessors.
+
+==== SCIF API Components ====
+The SCIF API has the following parts:
+1. Connection establishment using a client server model
+2. Byte stream messaging intended for short messages
+3. Node enumeration to determine online nodes
+4. Poll semantics for detection of incoming connections and messages
+5. Memory registration to pin down pages
+6. Remote memory mapping for low latency CPU accesses via mmap
+7. Remote DMA (RDMA) for high bandwidth DMA transfers
+8. Fence APIs for RDMA synchronization
+
+SCIF exposes the notion of a connection which can be used by peer processes on
+nodes in a SCIF PCIe "network" to share memory "windows" and to communicate. A
+process in a SCIF node initiates a SCIF connection to a peer process on a
+different node via a SCIF "endpoint". SCIF endpoints support messaging APIs
+which are similar to connection oriented socket APIs. Connected SCIF endpoints
+can also register local memory which is followed by data transfer using either
+DMA, CPU copies or remote memory mapping via mmap. SCIF supports both user and
+kernel mode clients which are functionally equivalent.
+
+==== SCIF Performance for MIC ====
+DMA bandwidth comparison between the TCP (over ethernet over PCIe) stack versus
+SCIF shows the performance advantages of SCIF for HPC applications and runtimes.
+
+             Comparison of TCP and SCIF based BW
+
+  Throughput (GB/sec)
+    8 +                                             PCIe Bandwidth ******
+      +                                                        TCP ######
+    7 +    **************************************             SCIF %%%%%%
+      |                       %%%%%%%%%%%%%%%%%%%
+    6 +                   %%%%
+      |                 %%
+      |               %%%
+    5 +              %%
+      |            %%
+    4 +           %%
+      |          %%
+    3 +         %%
+      |        %
+    2 +      %%
+      |     %%
+      |    %
+    1 +
+      +    ######################################
+    0 +++---+++--+--+-+--+--+-++-+--+-++-+--+-++-+-
+      1       10     100      1000   10000   100000
+                   Transfer Size (KBytes)
+
+SCIF allows memory sharing via mmap(..) between processes on different PCIe
+nodes and thus provides bare-metal PCIe latency. The round trip SCIF mmap
+latency from the host to an x100 MIC for an 8 byte message is 0.44 usecs.
+
+SCIF has a user space library which is a thin IOCTL wrapper providing a user
+space API similar to the kernel API in scif.h. The SCIF user space library
+is distributed @ https://software.intel.com/en-us/mic-developer
+
+Here is some pseudo code for an example of how two applications on two PCIe
+nodes would typically use the SCIF API:
+
+Process A (on node A)			Process B (on node B)
+
+/* get online node information */
+scif_get_node_ids(..)			scif_get_node_ids(..)
+scif_open(..)				scif_open(..)
+scif_bind(..)				scif_bind(..)
+scif_listen(..)
+scif_accept(..)				scif_connect(..)
+/* SCIF connection established */
+
+/* Send and receive short messages */
+scif_send(..)/scif_recv(..)		scif_send(..)/scif_recv(..)
+
+/* Register memory */
+scif_register(..)			scif_register(..)
+
+/* RDMA */
+scif_readfrom(..)/scif_writeto(..)	scif_readfrom(..)/scif_writeto(..)
+
+/* Fence DMAs */
+scif_fence_signal(..)			scif_fence_signal(..)
+
+mmap(..)				mmap(..)
+
+/* Access remote registered memory */
+
+/* Close the endpoints */
+scif_close(..)				scif_close(..)
diff --git a/include/linux/scif.h b/include/linux/scif.h
new file mode 100644
index 000000000000..44f4f3898bbe
--- /dev/null
+++ b/include/linux/scif.h
@@ -0,0 +1,993 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef __SCIF_H__
+#define __SCIF_H__
+
+#include <linux/types.h>
+#include <linux/poll.h>
+#include <linux/scif_ioctl.h>
+
+#define SCIF_ACCEPT_SYNC	1
+#define SCIF_SEND_BLOCK		1
+#define SCIF_RECV_BLOCK		1
+
+enum {
+	SCIF_PROT_READ = (1 << 0),
+	SCIF_PROT_WRITE = (1 << 1)
+};
+
+enum {
+	SCIF_MAP_FIXED = 0x10,
+	SCIF_MAP_KERNEL	= 0x20,
+};
+
+enum {
+	SCIF_FENCE_INIT_SELF = (1 << 0),
+	SCIF_FENCE_INIT_PEER = (1 << 1),
+	SCIF_SIGNAL_LOCAL = (1 << 4),
+	SCIF_SIGNAL_REMOTE = (1 << 5)
+};
+
+enum {
+	SCIF_RMA_USECPU = (1 << 0),
+	SCIF_RMA_USECACHE = (1 << 1),
+	SCIF_RMA_SYNC = (1 << 2),
+	SCIF_RMA_ORDERED = (1 << 3)
+};
+
+/* End of SCIF Admin Reserved Ports */
+#define SCIF_ADMIN_PORT_END	1024
+
+/* End of SCIF Reserved Ports */
+#define SCIF_PORT_RSVD		1088
+
+typedef struct scif_endpt *scif_epd_t;
+
+#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
+#define SCIF_REGISTER_FAILED ((off_t)-1)
+#define SCIF_MMAP_FAILED ((void *)-1)
+
+/**
+ * scif_open() - Create an endpoint
+ *
+ * Return:
+ * Upon successful completion, scif_open() returns an endpoint descriptor to
+ * be used in subsequent SCIF functions calls to refer to that endpoint;
+ * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
+ * returned and errno is set to indicate the error; in kernel mode a NULL
+ * scif_epd_t is returned.
+ *
+ * Errors:
+ * ENOMEM - Insufficient kernel memory was available
+ */
+scif_epd_t scif_open(void);
+
+/**
+ * scif_bind() - Bind an endpoint to a port
+ * @epd:	endpoint descriptor
+ * @pn:		port number
+ *
+ * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
+ * local node. If pn is zero, a port number greater than or equal to
+ * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
+ * exactly one local port. Ports less than 1024 when requested can only be bound
+ * by system (or root) processes or by processes executed by privileged users.
+ *
+ * Return:
+ * Upon successful completion, scif_bind() returns the port number to which epd
+ * is bound; otherwise in user mode -1 is returned and errno is set to
+ * indicate the error; in kernel mode the negative of one of the following
+ * errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINVAL - the endpoint or the port is already bound
+ * EISCONN - The endpoint is already connected
+ * ENOSPC - No port number available for assignment
+ * EACCES - The port requested is protected and the user is not the superuser
+ */
+int scif_bind(scif_epd_t epd, u16 pn);
+
+/**
+ * scif_listen() - Listen for connections on an endpoint
+ * @epd:	endpoint descriptor
+ * @backlog:	maximum pending connection requests
+ *
+ * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
+ * an endpoint that will be used to accept incoming connection requests. Once
+ * so marked, the endpoint is said to be in the listening state and may not be
+ * used as the endpoint of a connection.
+ *
+ * The endpoint, epd, must have been bound to a port.
+ *
+ * The backlog argument defines the maximum length to which the queue of
+ * pending connections for epd may grow. If a connection request arrives when
+ * the queue is full, the client may receive an error with an indication that
+ * the connection was refused.
+ *
+ * Return:
+ * Upon successful completion, scif_listen() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ * negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINVAL - the endpoint is not bound to a port
+ * EISCONN - The endpoint is already connected or listening
+ */
+int scif_listen(scif_epd_t epd, int backlog);
+
+/**
+ * scif_connect() - Initiate a connection on a port
+ * @epd:	endpoint descriptor
+ * @dst:	global id of port to which to connect
+ *
+ * The scif_connect() function requests the connection of endpoint epd to remote
+ * port dst. If the connection is successful, a peer endpoint, bound to dst, is
+ * created on node dst.node. On successful return, the connection is complete.
+ *
+ * If the endpoint epd has not already been bound to a port, scif_connect()
+ * will bind it to an unused local port.
+ *
+ * A connection is terminated when an endpoint of the connection is closed,
+ * either explicitly by scif_close(), or when a process that owns one of the
+ * endpoints of the connection is terminated.
+ *
+ * In user space, scif_connect() supports an asynchronous connection mode
+ * if the application has set the O_NONBLOCK flag on the endpoint via the
+ * fcntl() system call. Setting this flag will result in the calling process
+ * not to wait during scif_connect().
+ *
+ * Return:
+ * Upon successful completion, scif_connect() returns the port ID to which the
+ * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is
+ * set to indicate the error; in kernel mode the negative of one of the
+ * following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNREFUSED - The destination was not listening for connections or refused
+ * the connection request
+ * EINVAL - dst.port is not a valid port ID
+ * EISCONN - The endpoint is already connected
+ * ENOMEM - No buffer space is available
+ * ENODEV - The destination node does not exist, or the node is lost or existed,
+ * but is not currently in the network since it may have crashed
+ * ENOSPC - No port number available for assignment
+ * EOPNOTSUPP - The endpoint is listening and cannot be connected
+ */
+int scif_connect(scif_epd_t epd, struct scif_port_id *dst);
+
+/**
+ * scif_accept() - Accept a connection on an endpoint
+ * @epd:	endpoint descriptor
+ * @peer:	global id of port to which connected
+ * @newepd:	new connected endpoint descriptor
+ * @flags:	flags
+ *
+ * The scif_accept() call extracts the first connection request from the queue
+ * of pending connections for the port on which epd is listening. scif_accept()
+ * creates a new endpoint, bound to the same port as epd, and allocates a new
+ * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
+ * endpoint is connected to the endpoint through which the connection was
+ * requested. epd is unaffected by this call, and remains in the listening
+ * state.
+ *
+ * On successful return, peer holds the global port identifier (node id and
+ * local port number) of the port which requested the connection.
+ *
+ * A connection is terminated when an endpoint of the connection is closed,
+ * either explicitly by scif_close(), or when a process that owns one of the
+ * endpoints of the connection is terminated.
+ *
+ * The number of connections that can (subsequently) be accepted on epd is only
+ * limited by system resources (memory).
+ *
+ * The flags argument is formed by OR'ing together zero or more of the
+ * following values.
+ * SCIF_ACCEPT_SYNC - block until a connection request is presented. If
+ *			SCIF_ACCEPT_SYNC is not in flags, and no pending
+ *			connections are present on the queue, scif_accept()
+ *			fails with an EAGAIN error
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when there is a connection request. In kernel mode, the scif_poll()
+ * function may be used for this purpose. A readable event will be delivered
+ * when a connection is requested.
+ *
+ * Return:
+ * Upon successful completion, scif_accept() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ *	negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be
+ * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete
+ * its connection request
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINTR - Interrupted function
+ * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is
+ * NULL, or newepd is NULL
+ * ENODEV - The requesting node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOENT - Secondary part of epd registration failed
+ */
+int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t
+		*newepd, int flags);
+
+/**
+ * scif_close() - Close an endpoint
+ * @epd:	endpoint descriptor
+ *
+ * scif_close() closes an endpoint and performs necessary teardown of
+ * facilities associated with that endpoint.
+ *
+ * If epd is a listening endpoint then it will no longer accept connection
+ * requests on the port to which it is bound. Any pending connection requests
+ * are rejected.
+ *
+ * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
+ * which are in-process through epd or its peer endpoint will complete before
+ * scif_close() returns. Registered windows of the local and peer endpoints are
+ * released as if scif_unregister() was called against each window.
+ *
+ * Closing a SCIF endpoint does not affect local registered memory mapped by
+ * a SCIF endpoint on a remote node. The local memory remains mapped by the peer
+ * SCIF endpoint explicitly removed by calling munmap(..) by the peer.
+ *
+ * If the peer endpoint's receive queue is not empty at the time that epd is
+ * closed, then the peer endpoint can be passed as the endpoint parameter to
+ * scif_recv() until the receive queue is empty.
+ *
+ * epd is freed and may no longer be accessed.
+ *
+ * Return:
+ * Upon successful completion, scif_close() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ * negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ */
+int scif_close(scif_epd_t epd);
+
+/**
+ * scif_send() - Send a message
+ * @epd:	endpoint descriptor
+ * @msg:	message buffer address
+ * @len:	message length
+ * @flags:	blocking mode flags
+ *
+ * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
+ * are copied from memory starting at address msg. On successful execution the
+ * return value of scif_send() is the number of bytes that were sent, and is
+ * zero if no bytes were sent because len was zero. scif_send() may be called
+ * only when the endpoint is in a connected state.
+ *
+ * If a scif_send() call is non-blocking, then it sends only those bytes which
+ * can be sent without waiting, up to a maximum of len bytes.
+ *
+ * If a scif_send() call is blocking, then it normally returns after sending
+ * all len bytes. If a blocking call is interrupted or the connection is
+ * reset, the call is considered successful if some bytes were sent or len is
+ * zero, otherwise the call is considered unsuccessful.
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when the send queue is not full. In kernel mode, the scif_poll() function
+ * may be used for this purpose.
+ *
+ * It is recommended that scif_send()/scif_recv() only be used for short
+ * control-type message communication between SCIF endpoints. The SCIF RMA
+ * APIs are expected to provide better performance for transfer sizes of
+ * 1024 bytes or longer for the current MIC hardware and software
+ * implementation.
+ *
+ * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK
+ * is passed as the flags argument.
+ *
+ * Return:
+ * Upon successful completion, scif_send() returns the number of bytes sent;
+ * otherwise in user mode -1 is returned and errno is set to indicate the
+ * error; in kernel mode the negative of one of the following errors is
+ * returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - An invalid address was specified for a parameter
+ * EINVAL - flags is invalid, or len is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN - The endpoint is not connected
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags);
+
+/**
+ * scif_recv() - Receive a message
+ * @epd:	endpoint descriptor
+ * @msg:	message buffer address
+ * @len:	message buffer length
+ * @flags:	blocking mode flags
+ *
+ * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
+ * data are copied to memory starting at address msg. On successful execution
+ * the return value of scif_recv() is the number of bytes that were received,
+ * and is zero if no bytes were received because len was zero. scif_recv() may
+ * be called only when the endpoint is in a connected state.
+ *
+ * If a scif_recv() call is non-blocking, then it receives only those bytes
+ * which can be received without waiting, up to a maximum of len bytes.
+ *
+ * If a scif_recv() call is blocking, then it normally returns after receiving
+ * all len bytes. If the blocking call was interrupted due to a disconnection,
+ * subsequent calls to scif_recv() will copy all bytes received upto the point
+ * of disconnection.
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when data is available to be received. In kernel mode, the scif_poll()
+ * function may be used for this purpose.
+ *
+ * It is recommended that scif_send()/scif_recv() only be used for short
+ * control-type message communication between SCIF endpoints. The SCIF RMA
+ * APIs are expected to provide better performance for transfer sizes of
+ * 1024 bytes or longer for the current MIC hardware and software
+ * implementation.
+ *
+ * scif_recv() will block until the entire message is received if
+ * SCIF_RECV_BLOCK is passed as the flags argument.
+ *
+ * Return:
+ * Upon successful completion, scif_recv() returns the number of bytes
+ * received; otherwise in user mode -1 is returned and errno is set to
+ * indicate the error; in kernel mode the negative of one of the following
+ * errors is returned.
+ *
+ * Errors:
+ * EAGAIN - The destination node is returning from a low power state
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - An invalid address was specified for a parameter
+ * EINVAL - flags is invalid, or len is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN - The endpoint is not connected
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
+
+/**
+ * scif_register() - Mark a memory region for remote access.
+ * @epd:		endpoint descriptor
+ * @addr:		starting virtual address
+ * @len:		length of range
+ * @offset:		offset of window
+ * @prot_flags:		read/write protection flags
+ * @map_flags:		mapping flags
+ *
+ * The scif_register() function opens a window, a range of whole pages of the
+ * registered address space of the endpoint epd, starting at offset po and
+ * continuing for len bytes. The value of po, further described below, is a
+ * function of the parameters offset and len, and the value of map_flags. Each
+ * page of the window represents the physical memory page which backs the
+ * corresponding page of the range of virtual address pages starting at addr
+ * and continuing for len bytes. addr and len are constrained to be multiples
+ * of the page size. A successful scif_register() call returns po.
+ *
+ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
+ * exactly, and offset is constrained to be a multiple of the page size. The
+ * mapping established by scif_register() will not replace any existing
+ * registration; an error is returned if any page within the range [offset,
+ * offset + len - 1] intersects an existing window.
+ *
+ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
+ * implementation-defined manner to arrive at po. The po value so chosen will
+ * be an area of the registered address space that the implementation deems
+ * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
+ * granting the implementation complete freedom in selecting po, subject to
+ * constraints described below. A non-zero value of offset is taken to be a
+ * suggestion of an offset near which the mapping should be placed. When the
+ * implementation selects a value for po, it does not replace any extant
+ * window. In all cases, po will be a multiple of the page size.
+ *
+ * The physical pages which are so represented by a window are available for
+ * access in calls to mmap(), scif_readfrom(), scif_writeto(),
+ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
+ * physical pages represented by the window will not be reused by the memory
+ * subsystem for any other purpose. Note that the same physical page may be
+ * represented by multiple windows.
+ *
+ * Subsequent operations which change the memory pages to which virtual
+ * addresses are mapped (such as mmap(), munmap()) have no effect on
+ * existing window.
+ *
+ * If the process will fork(), it is recommended that the registered
+ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
+ * problems due to copy-on-write semantics.
+ *
+ * The prot_flags argument is formed by OR'ing together one or more of the
+ * following values.
+ * SCIF_PROT_READ - allow read operations from the window
+ * SCIF_PROT_WRITE - allow write operations to the window
+ *
+ * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
+ * fixed offset.
+ *
+ * Return:
+ * Upon successful completion, scif_register() returns the offset at which the
+ * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
+ * is (off_t *)-1) is returned and errno is set to indicate the error; in
+ * kernel mode the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range
+ * [offset, offset + len -1] are already registered
+ * EAGAIN - The mapping could not be performed due to lack of resources
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
+ * set in flags, and offset is not a multiple of the page size, or addr is not a
+ * multiple of the page size, or len is not a multiple of the page size, or is
+ * 0, or offset is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN -The endpoint is not connected
+ */
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+		    int prot_flags, int map_flags);
+
+/**
+ * scif_unregister() - Mark a memory region for remote access.
+ * @epd:	endpoint descriptor
+ * @offset:	start of range to unregister
+ * @len:	length of range to unregister
+ *
+ * The scif_unregister() function closes those previously registered windows
+ * which are entirely within the range [offset, offset + len - 1]. It is an
+ * error to specify a range which intersects only a subrange of a window.
+ *
+ * On a successful return, pages within the window may no longer be specified
+ * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
+ * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window,
+ * however, continues to exist until all previous references against it are
+ * removed. A window is referenced if there is a mapping to it created by
+ * mmap(), or if scif_get_pages() was called against the window
+ * (and the pages have not been returned via scif_put_pages()). A window is
+ * also referenced while an RMA, in which some range of the window is a source
+ * or destination, is in progress. Finally a window is referenced while some
+ * offset in that window was specified to scif_fence_signal(), and the RMAs
+ * marked by that call to scif_fence_signal() have not completed. While a
+ * window is in this state, its registered address space pages are not
+ * available for use in a new registered window.
+ *
+ * When all such references to the window have been removed, its references to
+ * all the physical pages which it represents are removed. Similarly, the
+ * registered address space pages of the window become available for
+ * registration in a new window.
+ *
+ * Return:
+ * Upon successful completion, scif_unregister() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned. In the event of an
+ * error, no windows are unregistered.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a
+ * window, or offset is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
+
+/**
+ * scif_readfrom() - Copy from a remote address space
+ * @epd:	endpoint descriptor
+ * @loffset:	offset in local registered address space to
+ *		which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space
+ *		from which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_readfrom() copies len bytes from the remote registered address space of
+ * the peer of endpoint epd, starting at the offset roffset to the local
+ * registered address space of epd, starting at the offset loffset.
+ *
+ * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
+ * roffset + len - 1] must be within some registered window or windows of the
+ * local and remote nodes. A range may intersect multiple registered windows,
+ * but only if those windows are contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset and roffset are not
+ * cacheline aligned but are separated by some multiple of 64. The lowest level
+ * of performance is likely if loffset and roffset are not separated by a
+ * multiple of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_readfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
+ * address space of epd, or, The range [roffset, roffset + len - 1] is invalid
+ * for the registered address space of the peer of epd, or loffset or roffset
+ * is negative
+ */
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
+		  roffset, int rma_flags);
+
+/**
+ * scif_writeto() - Copy to a remote address space
+ * @epd:	endpoint descriptor
+ * @loffset:	offset in local registered address space
+ *		from which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space to
+ *		which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_writeto() copies len bytes from the local registered address space of
+ * epd, starting at the offset loffset to the remote registered address space
+ * of the peer of endpoint epd, starting at the offset roffset.
+ *
+ * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
+ * roffset + len - 1] must be within some registered window or windows of the
+ * local and remote nodes. A range may intersect multiple registered windows,
+ * but only if those windows are contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset and roffset are not cacheline
+ * aligned but are separated by some multiple of 64. The lowest level of
+ * performance is likely if loffset and roffset are not separated by a multiple
+ * of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *			engine.
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_readfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
+ * address space of epd, or, The range [roffset , roffset + len -1] is invalid
+ * for the registered address space of the peer of epd, or loffset or roffset
+ * is negative
+ */
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
+		 roffset, int rma_flags);
+
+/**
+ * scif_vreadfrom() - Copy from a remote address space
+ * @epd:	endpoint descriptor
+ * @addr:	address to which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space
+ *		from which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_vreadfrom() copies len bytes from the remote registered address
+ * space of the peer of endpoint epd, starting at the offset roffset, to local
+ * memory, starting at addr.
+ *
+ * The specified range [roffset, roffset + len - 1] must be within some
+ * registered window or windows of the remote nodes. The range may
+ * intersect multiple registered windows, but only if those windows are
+ * contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
+ * the specified local memory range may be remain in a pinned state even after
+ * the specified transfer completes. This may reduce overhead if some or all of
+ * the same virtual address range is referenced in a subsequent call of
+ * scif_vreadfrom() or scif_vwriteto().
+ *
+ * The optimal DMA performance will likely be realized if both
+ * addr and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if addr and roffset are not
+ * cacheline aligned but are separated by some multiple of 64. The lowest level
+ * of performance is likely if addr and roffset are not separated by a
+ * multiple of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_USECACHE - enable registration caching
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *	the source range becomes visible on the destination node
+ *	after all other transferred data in the source range has
+ *	become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
+		   int rma_flags);
+
+/**
+ * scif_vwriteto() - Copy to a remote address space
+ * @epd:	endpoint descriptor
+ * @addr:	address from which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space to
+ *		which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
+ * the remote registered address space of the peer of endpoint epd, starting at
+ * the offset roffset.
+ *
+ * The specified range [roffset, roffset + len - 1] must be within some
+ * registered window or windows of the remote nodes. The range may intersect
+ * multiple registered windows, but only if those windows are contiguous in the
+ * registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
+ * the specified local memory range may be remain in a pinned state even after
+ * the specified transfer completes. This may reduce overhead if some or all of
+ * the same virtual address range is referenced in a subsequent call of
+ * scif_vreadfrom() or scif_vwriteto().
+ *
+ * The optimal DMA performance will likely be realized if both
+ * addr and offset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if addr and offset are not cacheline
+ * aligned but are separated by some multiple of 64. The lowest level of
+ * performance is likely if addr and offset are not separated by a multiple of
+ * 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_USECACHE - allow registration caching
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_vwriteto() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset,
+		  int rma_flags);
+
+/**
+ * scif_fence_mark() - Mark previously issued RMAs
+ * @epd:	endpoint descriptor
+ * @flags:	control flags
+ * @mark:	marked value returned as output.
+ *
+ * scif_fence_mark() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
+ * marked with a value returned at mark. The application may subsequently call
+ * scif_fence_wait(), passing the value returned at mark, to await completion
+ * of all RMAs so marked.
+ *
+ * The flags argument has exactly one of the following values.
+ * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
+ *	epd are marked
+ * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
+ *	of endpoint epd are marked
+ *
+ * Return:
+ * Upon successful completion, scif_fence_mark() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENOMEM - Insufficient kernel memory was available
+ */
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
+
+/**
+ * scif_fence_wait() - Wait for completion of marked RMAs
+ * @epd:	endpoint descriptor
+ * @mark:	mark request
+ *
+ * scif_fence_wait() returns after all RMAs marked with mark have completed.
+ * The value passed in mark must have been obtained in a previous call to
+ * scif_fence_mark().
+ *
+ * Return:
+ * Upon successful completion, scif_fence_wait() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENOMEM - Insufficient kernel memory was available
+ */
+int scif_fence_wait(scif_epd_t epd, int mark);
+
+/**
+ * scif_fence_signal() - Request a memory update on completion of RMAs
+ * @epd:	endpoint descriptor
+ * @loff:	local offset
+ * @lval:	local value to write to loffset
+ * @roff:	remote offset
+ * @rval:	remote value to write to roffset
+ * @flags:	flags
+ *
+ * scif_fence_signal() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or marking the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd.
+ *
+ * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
+ * marked set, lval is written to memory at the address corresponding to offset
+ * loff in the local registered address space of epd. loff must be within a
+ * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
+ * of the RMAs in the marked set, rval is written to memory at the address
+ * corresponding to offset roff in the remote registered address space of epd.
+ * roff must be within a remote registered window of the peer of epd. Note
+ * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
+ *
+ * The flags argument is formed by OR'ing together the following.
+ * Exactly one of the following values.
+ * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
+ *	epd are marked
+ * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
+ *	of endpoint epd are marked
+ * One or more of the following values.
+ * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to
+ *	memory at the address corresponding to offset loff in the local
+ *	registered address space of epd.
+ * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to
+ *	memory at the address corresponding to offset roff in the remote
+ *	registered address space of epd.
+ *
+ * Return:
+ * Upon successful completion, scif_fence_signal() returns 0; otherwise in
+ * user mode -1 is returned and errno is set to indicate the error; in kernel
+ * mode the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - flags is invalid, or loff or roff are not DWORD aligned
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - loff is invalid for the registered address of epd, or roff is invalid
+ * for the registered address space, of the peer of epd
+ */
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
+		      u64 rval, int flags);
+
+/**
+ * scif_get_node_ids() - Return information about online nodes
+ * @nodes:	array in which to return online node IDs
+ * @len:	number of entries in the nodes array
+ * @self:	address to place the node ID of the local node
+ *
+ * scif_get_node_ids() fills in the nodes array with up to len node IDs of the
+ * nodes in the SCIF network. If there is not enough space in nodes, as
+ * indicated by the len parameter, only len node IDs are returned in nodes. The
+ * return value of scif_get_node_ids() is the total number of nodes currently in
+ * the SCIF network. By checking the return value against the len parameter,
+ * the user may determine if enough space for nodes was allocated.
+ *
+ * The node ID of the local node is returned at self.
+ *
+ * Return:
+ * Upon successful completion, scif_get_node_ids() returns the actual number of
+ * online nodes in the SCIF network including 'self'; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode no
+ * errors are returned.
+ *
+ * Errors:
+ * EFAULT - Bad address
+ */
+int scif_get_node_ids(u16 *nodes, int len, u16 *self);
+
+#endif /* __SCIF_H__ */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 1a0006a76b00..4ad65eebbff8 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -352,6 +352,7 @@ header-y += rtc.h
 header-y += rtnetlink.h
 header-y += scc.h
 header-y += sched.h
+header-y += scif_ioctl.h
 header-y += screen_info.h
 header-y += sctp.h
 header-y += sdla.h
diff --git a/include/uapi/linux/scif_ioctl.h b/include/uapi/linux/scif_ioctl.h
new file mode 100644
index 000000000000..4a94d917cf99
--- /dev/null
+++ b/include/uapi/linux/scif_ioctl.h
@@ -0,0 +1,130 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+/*
+ * -----------------------------------------
+ * SCIF IOCTL interface information
+ * -----------------------------------------
+ */
+#ifndef SCIF_IOCTL_H
+#define SCIF_IOCTL_H
+
+#include <linux/types.h>
+
+/**
+ * struct scif_port_id - SCIF port information
+ * @node:	node on which port resides
+ * @port:	local port number
+ */
+struct scif_port_id {
+	__u16 node;
+	__u16 port;
+};
+
+/**
+ * struct scifioctl_connect - used for SCIF_CONNECT IOCTL
+ * @self:	used to read back the assigned port_id
+ * @peer:	destination node and port to connect to
+ */
+struct scifioctl_connect {
+	struct scif_port_id	self;
+	struct scif_port_id	peer;
+};
+
+/**
+ * struct scifioctl_accept - used for SCIF_ACCEPTREQ IOCTL
+ * @flags:	flags
+ * @peer:	global id of peer endpoint
+ * @endpt:	new connected endpoint descriptor
+ */
+struct scifioctl_accept {
+	__s32			flags;
+	struct scif_port_id	peer;
+	__u64			endpt;
+};
+
+/**
+ * struct scifioctl_msg - used for SCIF_SEND/SCIF_RECV IOCTL
+ * @msg:	message buffer address
+ * @len:	message length
+ * @flags:	flags
+ * @out_len:	number of bytes sent/received
+ */
+struct scifioctl_msg {
+	__u64	msg;
+	__s32	len;
+	__s32	flags;
+	__s32	out_len;
+};
+
+/**
+ * struct scifioctl_node_ids - used for SCIF_GET_NODEIDS IOCTL
+ * @nodes:	pointer to an array of node_ids
+ * @self:	ID of the current node
+ * @len:	length of array
+ */
+struct scifioctl_node_ids {
+	__u64	nodes;
+	__u64	self;
+	__s32	len;
+};
+
+#define SCIF_BIND		_IOWR('s', 1, __u64)
+#define SCIF_LISTEN		_IOW('s', 2, __s32)
+#define SCIF_CONNECT		_IOWR('s', 3, struct scifioctl_connect)
+#define SCIF_ACCEPTREQ		_IOWR('s', 4, struct scifioctl_accept)
+#define SCIF_ACCEPTREG		_IOWR('s', 5, __u64)
+#define SCIF_SEND		_IOWR('s', 6, struct scifioctl_msg)
+#define SCIF_RECV		_IOWR('s', 7, struct scifioctl_msg)
+#define SCIF_GET_NODEIDS	_IOWR('s', 14, struct scifioctl_node_ids)
+
+#endif /* SCIF_IOCTL_H */
-- 
cgit v1.2.3


From c9d5c53db959e587d8b59c6a202e2dca741baac4 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Wed, 29 Apr 2015 05:32:32 -0700
Subject: misc: mic: Common MIC header file changes in preparation for SCIF

Update mic_bootparam and define the maximum number of DMA channels

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/common/mic_dev.h |  3 +++
 include/uapi/linux/mic_common.h   | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
index 92999c2bbf82..0b58c46045dc 100644
--- a/drivers/misc/mic/common/mic_dev.h
+++ b/drivers/misc/mic/common/mic_dev.h
@@ -48,4 +48,7 @@ struct mic_mw {
 #define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
 #define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
 
+/* Maximum number of DMA channels */
+#define MIC_MAX_DMA_CHAN 4
+
 #endif
diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h
index 6eb40244e019..302a2ced373c 100644
--- a/include/uapi/linux/mic_common.h
+++ b/include/uapi/linux/mic_common.h
@@ -80,6 +80,12 @@ struct mic_device_ctrl {
  * @h2c_config_db: Host to Card Virtio config doorbell set by card
  * @shutdown_status: Card shutdown status set by card
  * @shutdown_card: Set to 1 by the host when a card shutdown is initiated
+ * @tot_nodes: Total number of nodes in the SCIF network
+ * @node_id: Unique id of the node
+ * @h2c_scif_db - Host to card SCIF doorbell set by card
+ * @c2h_scif_db - Card to host SCIF doorbell set by host
+ * @scif_host_dma_addr - SCIF host queue pair DMA address
+ * @scif_card_dma_addr - SCIF card queue pair DMA address
  */
 struct mic_bootparam {
 	__le32 magic;
@@ -88,6 +94,12 @@ struct mic_bootparam {
 	__s8 h2c_config_db;
 	__u8 shutdown_status;
 	__u8 shutdown_card;
+	__u8 tot_nodes;
+	__u8 node_id;
+	__u8 h2c_scif_db;
+	__u8 c2h_scif_db;
+	__u64 scif_host_dma_addr;
+	__u64 scif_card_dma_addr;
 } __attribute__ ((aligned(8)));
 
 /**
-- 
cgit v1.2.3


From cd8dc0548511efff7a97d978f989ce67a883f9a5 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Sat, 11 Apr 2015 18:07:57 -0700
Subject: Drivers: hv: vss: full handshake support

Introduce VSS_OP_REGISTER1 to support kernel replying to the negotiation
message with its own version.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Tested-by: Alex Ng <alexng@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_snapshot.c    | 49 ++++++++++++++++++++++++++++++++++-----------
 include/uapi/linux/hyperv.h |  5 +++++
 tools/hv/hv_vss_daemon.c    | 14 +++++++++++++
 3 files changed, 56 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 2c8c246d09eb..ee1762b39bf2 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -59,6 +59,11 @@ static struct {
 
 static void vss_respond_to_host(int error);
 
+/*
+ * This state maintains the version number registered by the daemon.
+ */
+static int dm_reg_value;
+
 static const char vss_devname[] = "vmbus/hv_vss";
 static __u8 *recv_buffer;
 static struct hvutil_transport *hvt;
@@ -89,6 +94,29 @@ static void vss_timeout_func(struct work_struct *dummy)
 			hv_vss_onchannelcallback);
 }
 
+static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
+{
+	u32 our_ver = VSS_OP_REGISTER1;
+
+	switch (vss_msg->vss_hdr.operation) {
+	case VSS_OP_REGISTER:
+		/* Daemon doesn't expect us to reply */
+		dm_reg_value = VSS_OP_REGISTER;
+		break;
+	case VSS_OP_REGISTER1:
+		/* Daemon expects us to reply with our own version*/
+		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+			return -EFAULT;
+		dm_reg_value = VSS_OP_REGISTER1;
+		break;
+	default:
+		return -EINVAL;
+	}
+	vss_transaction.state = HVUTIL_READY;
+	pr_info("VSS daemon registered\n");
+	return 0;
+}
+
 static int vss_on_msg(void *msg, int len)
 {
 	struct hv_vss_msg *vss_msg = (struct hv_vss_msg *)msg;
@@ -96,18 +124,15 @@ static int vss_on_msg(void *msg, int len)
 	if (len != sizeof(*vss_msg))
 		return -EINVAL;
 
-	/*
-	 * Don't process registration messages if we're in the middle of
-	 * a transaction processing.
-	 */
-	if (vss_transaction.state > HVUTIL_READY &&
-	    vss_msg->vss_hdr.operation == VSS_OP_REGISTER)
-		return -EINVAL;
-
-	if (vss_transaction.state == HVUTIL_DEVICE_INIT &&
-	    vss_msg->vss_hdr.operation == VSS_OP_REGISTER) {
-		pr_info("VSS daemon registered\n");
-		vss_transaction.state = HVUTIL_READY;
+	if (vss_msg->vss_hdr.operation == VSS_OP_REGISTER ||
+	    vss_msg->vss_hdr.operation == VSS_OP_REGISTER1) {
+		/*
+		 * Don't process registration messages if we're in the middle
+		 * of a transaction processing.
+		 */
+		if (vss_transaction.state > HVUTIL_READY)
+			return -EINVAL;
+		return vss_handle_handshake(vss_msg);
 	} else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) {
 		vss_transaction.state = HVUTIL_USERSPACE_RECV;
 		if (cancel_delayed_work_sync(&vss_timeout_work)) {
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index bb1cb73c927a..66c76df2c32d 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -45,6 +45,11 @@
 
 #define VSS_OP_REGISTER 128
 
+/*
+  Daemon code with full handshake support.
+ */
+#define VSS_OP_REGISTER1 129
+
 enum hv_vss_op {
 	VSS_OP_CREATE = 0,
 	VSS_OP_DELETE,
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 36f18211afa7..96234b638249 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -148,6 +148,8 @@ int main(int argc, char *argv[])
 	int	op;
 	struct hv_vss_msg vss_msg[1];
 	int daemonize = 1, long_index = 0, opt;
+	int in_handshake = 1;
+	__u32 kernel_modver;
 
 	static struct option long_options[] = {
 		{"help",	no_argument,	   0,  'h' },
@@ -211,6 +213,18 @@ int main(int argc, char *argv[])
 
 		len = read(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
 
+		if (in_handshake) {
+			if (len != sizeof(kernel_modver)) {
+				syslog(LOG_ERR, "invalid version negotiation");
+				exit(EXIT_FAILURE);
+			}
+			kernel_modver = *(__u32 *)vss_msg;
+			in_handshake = 0;
+			syslog(LOG_INFO, "VSS: kernel module version: %d",
+			       kernel_modver);
+			continue;
+		}
+
 		if (len != sizeof(struct hv_vss_msg)) {
 			syslog(LOG_ERR, "read failed; error:%d %s",
 			       errno, strerror(errno));
-- 
cgit v1.2.3


From a4d1ee5b0255a135fead1d62a7fc7e6fe718b66e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Sat, 11 Apr 2015 18:07:58 -0700
Subject: Drivers: hv: fcopy: full handshake support

Introduce FCOPY_VERSION_1 to support kernel replying to the negotiation
message with its own version.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Tested-by: Alex Ng <alexng@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_fcopy.c       | 16 +++++++++++++++-
 include/uapi/linux/hyperv.h |  3 ++-
 tools/hv/hv_fcopy_daemon.c  | 15 +++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 6a8ec9fb0869..b7b528cd481b 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -62,6 +62,10 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data);
 static const char fcopy_devname[] = "vmbus/hv_fcopy";
 static u8 *recv_buffer;
 static struct hvutil_transport *hvt;
+/*
+ * This state maintains the version number registered by the daemon.
+ */
+static int dm_reg_value;
 
 static void fcopy_timeout_func(struct work_struct *dummy)
 {
@@ -81,8 +85,18 @@ static void fcopy_timeout_func(struct work_struct *dummy)
 
 static int fcopy_handle_handshake(u32 version)
 {
+	u32 our_ver = FCOPY_CURRENT_VERSION;
+
 	switch (version) {
-	case FCOPY_CURRENT_VERSION:
+	case FCOPY_VERSION_0:
+		/* Daemon doesn't expect us to reply */
+		dm_reg_value = version;
+		break;
+	case FCOPY_VERSION_1:
+		/* Daemon expects us to reply with our own version */
+		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+			return -EFAULT;
+		dm_reg_value = version;
 		break;
 	default:
 		/*
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index 66c76df2c32d..e4c0a35d6417 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -105,7 +105,8 @@ struct hv_vss_msg {
  */
 
 #define FCOPY_VERSION_0 0
-#define FCOPY_CURRENT_VERSION FCOPY_VERSION_0
+#define FCOPY_VERSION_1 1
+#define FCOPY_CURRENT_VERSION FCOPY_VERSION_1
 #define W_MAX_PATH 260
 
 enum hv_fcopy_op {
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index 9445d8f264a4..5480e4e424eb 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -137,6 +137,8 @@ int main(int argc, char *argv[])
 	int version = FCOPY_CURRENT_VERSION;
 	char *buffer[4096 * 2];
 	struct hv_fcopy_hdr *in_msg;
+	int in_handshake = 1;
+	__u32 kernel_modver;
 
 	static struct option long_options[] = {
 		{"help",	no_argument,	   0,  'h' },
@@ -191,6 +193,19 @@ int main(int argc, char *argv[])
 			syslog(LOG_ERR, "pread failed: %s", strerror(errno));
 			exit(EXIT_FAILURE);
 		}
+
+		if (in_handshake) {
+			if (len != sizeof(kernel_modver)) {
+				syslog(LOG_ERR, "invalid version negotiation");
+				exit(EXIT_FAILURE);
+			}
+			kernel_modver = *(__u32 *)buffer;
+			in_handshake = 0;
+			syslog(LOG_INFO, "HV_FCOPY: kernel module version: %d",
+			       kernel_modver);
+			continue;
+		}
+
 		in_msg = (struct hv_fcopy_hdr *)buffer;
 
 		switch (in_msg->operation) {
-- 
cgit v1.2.3


From 113c62ee49d212ecb934147c6ba84cfa79c26121 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 11 May 2015 10:23:38 +0900
Subject: tty: fix comment of ASYNCB_SPD_HI

This comment does not reflect the actual code.  It should be 57600,
not 56000.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/tty_flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tty_flags.h b/include/uapi/linux/tty_flags.h
index fae4864737fa..072e41e45ee2 100644
--- a/include/uapi/linux/tty_flags.h
+++ b/include/uapi/linux/tty_flags.h
@@ -15,7 +15,7 @@
 #define ASYNCB_FOURPORT		 1 /* Set OU1, OUT2 per AST Fourport settings */
 #define ASYNCB_SAK		 2 /* Secure Attention Key (Orange book) */
 #define ASYNCB_SPLIT_TERMIOS	 3 /* [x] Separate termios for dialin/callout */
-#define ASYNCB_SPD_HI		 4 /* Use 56000 instead of 38400 bps */
+#define ASYNCB_SPD_HI		 4 /* Use 57600 instead of 38400 bps */
 #define ASYNCB_SPD_VHI		 5 /* Use 115200 instead of 38400 bps */
 #define ASYNCB_SKIP_TEST	 6 /* Skip UART test during autoconfiguration */
 #define ASYNCB_AUTO_IRQ		 7 /* Do automatic IRQ during
-- 
cgit v1.2.3


From d52d3997f843ffefaa8d8462790ffcaca6c74192 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 22 May 2015 20:56:06 -0700
Subject: ipv6: Create percpu rt6_info

After the patch
'ipv6: Only create RTF_CACHE routes after encountering pmtu exception',
we need to compensate the performance hit (bouncing dst->__refcnt).

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h           |   3 +-
 include/uapi/linux/ipv6_route.h |   1 +
 net/ipv6/ip6_fib.c              |  24 +++++++-
 net/ipv6/route.c                | 132 +++++++++++++++++++++++++++++++++++-----
 4 files changed, 142 insertions(+), 18 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cc8f03c10c43..3b76849c190f 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -124,6 +124,7 @@ struct rt6_info {
 	struct uncached_list		*rt6i_uncached_list;
 
 	struct inet6_dev		*rt6i_idev;
+	struct rt6_info * __percpu	*rt6i_pcpu;
 
 	u32				rt6i_metric;
 	u32				rt6i_pmtu;
@@ -164,7 +165,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
 
 static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 {
-	if (unlikely(rt->dst.flags & DST_NOCACHE))
+	if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
 		rt = (struct rt6_info *)(rt->dst.from);
 
 	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index 2be7bd174751..f6598d1c886e 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -34,6 +34,7 @@
 #define RTF_PREF(pref)	((pref) << 27)
 #define RTF_PREF_MASK	0x18000000
 
+#define RTF_PCPU	0x40000000
 #define RTF_LOCAL	0x80000000
 
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 83341b3a248d..55d19861ab20 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -154,10 +154,32 @@ static void node_free(struct fib6_node *fn)
 	kmem_cache_free(fib6_node_kmem, fn);
 }
 
+static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+{
+	int cpu;
+
+	if (!non_pcpu_rt->rt6i_pcpu)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct rt6_info **ppcpu_rt;
+		struct rt6_info *pcpu_rt;
+
+		ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
+		pcpu_rt = *ppcpu_rt;
+		if (pcpu_rt) {
+			dst_free(&pcpu_rt->dst);
+			*ppcpu_rt = NULL;
+		}
+	}
+}
+
 static void rt6_release(struct rt6_info *rt)
 {
-	if (atomic_dec_and_test(&rt->rt6i_ref))
+	if (atomic_dec_and_test(&rt->rt6i_ref)) {
+		rt6_free_pcpu(rt);
 		dst_free(&rt->dst);
+	}
 }
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 90c8eaa24565..1a1122a6bbf5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -165,11 +165,18 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 	}
 }
 
+static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
+{
+	return dst_metrics_write_ptr(rt->dst.from);
+}
+
 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 
-	if (rt->rt6i_flags & RTF_CACHE)
+	if (rt->rt6i_flags & RTF_PCPU)
+		return rt6_pcpu_cow_metrics(rt);
+	else if (rt->rt6i_flags & RTF_CACHE)
 		return NULL;
 	else
 		return dst_cow_metrics_generic(dst, old);
@@ -309,10 +316,10 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
 #endif
 
 /* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct net *net,
-					     struct net_device *dev,
-					     int flags,
-					     struct fib6_table *table)
+static struct rt6_info *__ip6_dst_alloc(struct net *net,
+					struct net_device *dev,
+					int flags,
+					struct fib6_table *table)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 					0, DST_OBSOLETE_FORCE_CHK, flags);
@@ -327,6 +334,34 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 	return rt;
 }
 
+static struct rt6_info *ip6_dst_alloc(struct net *net,
+				      struct net_device *dev,
+				      int flags,
+				      struct fib6_table *table)
+{
+	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
+
+	if (rt) {
+		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
+		if (rt->rt6i_pcpu) {
+			int cpu;
+
+			for_each_possible_cpu(cpu) {
+				struct rt6_info **p;
+
+				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+				/* no one shares rt */
+				*p =  NULL;
+			}
+		} else {
+			dst_destroy((struct dst_entry *)rt);
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
@@ -335,6 +370,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 
 	dst_destroy_metrics_generic(dst);
 
+	if (rt->rt6i_pcpu)
+		free_percpu(rt->rt6i_pcpu);
+
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;
@@ -912,11 +950,11 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	 *	Clone the route.
 	 */
 
-	if (ort->rt6i_flags & RTF_CACHE)
+	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 		ort = (struct rt6_info *)ort->dst.from;
 
-	rt = ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
-			   0, ort->rt6i_table);
+	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
+			     0, ort->rt6i_table);
 
 	if (!rt)
 		return NULL;
@@ -943,6 +981,54 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	return rt;
 }
 
+static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
+{
+	struct rt6_info *pcpu_rt;
+
+	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
+				  rt->dst.dev, rt->dst.flags,
+				  rt->rt6i_table);
+
+	if (!pcpu_rt)
+		return NULL;
+	ip6_rt_copy_init(pcpu_rt, rt);
+	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
+	pcpu_rt->rt6i_flags |= RTF_PCPU;
+	return pcpu_rt;
+}
+
+/* It should be called with read_lock_bh(&tb6_lock) acquired */
+static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
+{
+	struct rt6_info *pcpu_rt, *prev, **p;
+
+	p = this_cpu_ptr(rt->rt6i_pcpu);
+	pcpu_rt = *p;
+
+	if (pcpu_rt)
+		goto done;
+
+	pcpu_rt = ip6_rt_pcpu_alloc(rt);
+	if (!pcpu_rt) {
+		struct net *net = dev_net(rt->dst.dev);
+
+		pcpu_rt = net->ipv6.ip6_null_entry;
+		goto done;
+	}
+
+	prev = cmpxchg(p, NULL, pcpu_rt);
+	if (prev) {
+		/* If someone did it before us, return prev instead */
+		dst_destroy(&pcpu_rt->dst);
+		pcpu_rt = prev;
+	}
+
+done:
+	dst_hold(&pcpu_rt->dst);
+	rt6_dst_from_metrics_check(pcpu_rt);
+	return pcpu_rt;
+}
+
 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 				      struct flowi6 *fl6, int flags)
 {
@@ -975,11 +1061,13 @@ redo_rt6_select:
 		}
 	}
 
-	dst_use(&rt->dst, jiffies);
-	read_unlock_bh(&table->tb6_lock);
 
 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
-		goto done;
+		dst_use(&rt->dst, jiffies);
+		read_unlock_bh(&table->tb6_lock);
+
+		rt6_dst_from_metrics_check(rt);
+		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
 		/* Create a RTF_CACHE clone which will not be
@@ -990,6 +1078,9 @@ redo_rt6_select:
 
 		struct rt6_info *uncached_rt;
 
+		dst_use(&rt->dst, jiffies);
+		read_unlock_bh(&table->tb6_lock);
+
 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
 		dst_release(&rt->dst);
 
@@ -997,13 +1088,22 @@ redo_rt6_select:
 			rt6_uncached_list_add(uncached_rt);
 		else
 			uncached_rt = net->ipv6.ip6_null_entry;
+
 		dst_hold(&uncached_rt->dst);
 		return uncached_rt;
-	}
 
-done:
-	rt6_dst_from_metrics_check(rt);
-	return rt;
+	} else {
+		/* Get a percpu copy */
+
+		struct rt6_info *pcpu_rt;
+
+		rt->dst.lastuse = jiffies;
+		rt->dst.__use++;
+		pcpu_rt = rt6_get_pcpu_route(rt);
+		read_unlock_bh(&table->tb6_lock);
+
+		return pcpu_rt;
+	}
 }
 
 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
@@ -1147,7 +1247,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	rt6_dst_from_metrics_check(rt);
 
-	if (unlikely(dst->flags & DST_NOCACHE))
+	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
 		return rt6_dst_from_check(rt, cookie);
 	else
 		return rt6_check(rt, cookie);
-- 
cgit v1.2.3


From 069d4a7b583274e3fd8712c92a035626e0ebf7be Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 3 Mar 2015 11:58:14 +0100
Subject: netfilter: ebtables: fix comment grammar

s/stongly inspired on/strongly inspired by/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/netfilter_bridge/ebtables.h      | 2 +-
 include/uapi/linux/netfilter_bridge/ebtables.h | 2 +-
 net/bridge/netfilter/ebtables.c                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 34e7a2b7f867..9ac6f263956b 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -6,7 +6,7 @@
  *
  *  ebtables.c,v 2.0, April, 2002
  *
- *  This code is stongly inspired on the iptables code which is
+ *  This code is strongly inspired by the iptables code which is
  *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  */
 #ifndef __LINUX_BRIDGE_EFF_H
diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index ba993360dbe9..ab46c805c455 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -6,7 +6,7 @@
  *
  *  ebtables.c,v 2.0, April, 2002
  *
- *  This code is stongly inspired on the iptables code which is
+ *  This code is strongly inspired by the iptables code which is
  *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  */
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index d9a8c05d995d..54df89edcf20 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -6,7 +6,7 @@
  *
  *  ebtables.c,v 2.0, July, 2002
  *
- *  This code is stongly inspired on the iptables code which is
+ *  This code is strongly inspired by the iptables code which is
  *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  *
  *  This program is free software; you can redistribute it and/or
-- 
cgit v1.2.3


From ebddf1a8d78aa3436353fae75c4396e50cb2d6cf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 26 May 2015 18:41:20 +0200
Subject: netfilter: nf_tables: allow to bind table to net_device

This patch adds the internal NFT_AF_NEEDS_DEV flag to indicate that you must
attach this table to a net_device.

This change is required by the follow up patch that introduces the new netdev
table.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  8 ++++++
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nf_tables_api.c            | 46 ++++++++++++++++++++++++++++----
 3 files changed, 51 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e6bcf55dcf20..3d6f48ca40a7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -819,6 +819,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt,
  *	@use: number of chain references to this table
  *	@flags: table flag (see enum nft_table_flags)
  *	@name: name of the table
+ *	@dev: this table is bound to this device (if any)
  */
 struct nft_table {
 	struct list_head		list;
@@ -828,6 +829,11 @@ struct nft_table {
 	u32				use;
 	u16				flags;
 	char				name[NFT_TABLE_MAXNAMELEN];
+	struct net_device		*dev;
+};
+
+enum nft_af_flags {
+	NFT_AF_NEEDS_DEV	= (1 << 0),
 };
 
 /**
@@ -838,6 +844,7 @@ struct nft_table {
  *	@nhooks: number of hooks in this family
  *	@owner: module owner
  *	@tables: used internally
+ *	@flags: family flags
  *	@nops: number of hook ops in this family
  *	@hook_ops_init: initialization function for chain hook ops
  *	@hooks: hookfn overrides for packet validation
@@ -848,6 +855,7 @@ struct nft_af_info {
 	unsigned int			nhooks;
 	struct module			*owner;
 	struct list_head		tables;
+	u32				flags;
 	unsigned int			nops;
 	void				(*hook_ops_init)(struct nf_hook_ops *,
 							 unsigned int);
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 5fa1cd04762e..89a671e0f5e7 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -146,12 +146,14 @@ enum nft_table_flags {
  * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
  * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
  * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
+ * @NFTA_TABLE_DEV: net device name (NLA_STRING)
  */
 enum nft_table_attributes {
 	NFTA_TABLE_UNSPEC,
 	NFTA_TABLE_NAME,
 	NFTA_TABLE_FLAGS,
 	NFTA_TABLE_USE,
+	NFTA_TABLE_DEV,
 	__NFTA_TABLE_MAX
 };
 #define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ad9d11fb29fd..2fd4e99dd074 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -399,6 +399,8 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
 	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
 				    .len = NFT_TABLE_MAXNAMELEN - 1 },
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_TABLE_DEV]	= { .type = NLA_STRING,
+				    .len = IFNAMSIZ - 1 },
 };
 
 static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -423,6 +425,10 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
 		goto nla_put_failure;
 
+	if (table->dev &&
+	    nla_put_string(skb, NFTA_TABLE_DEV, table->dev->name))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -608,6 +614,11 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 	if (flags == ctx->table->flags)
 		return 0;
 
+	if ((ctx->afi->flags & NFT_AF_NEEDS_DEV) &&
+	    ctx->nla[NFTA_TABLE_DEV] &&
+	    nla_strcmp(ctx->nla[NFTA_TABLE_DEV], ctx->table->dev->name))
+		return -EOPNOTSUPP;
+
 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
 				sizeof(struct nft_trans_table));
 	if (trans == NULL)
@@ -645,6 +656,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct net_device *dev = NULL;
 	u32 flags = 0;
 	struct nft_ctx ctx;
 	int err;
@@ -679,30 +691,50 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 			return -EINVAL;
 	}
 
+	if (afi->flags & NFT_AF_NEEDS_DEV) {
+		char ifname[IFNAMSIZ];
+
+		if (!nla[NFTA_TABLE_DEV])
+			return -EOPNOTSUPP;
+
+		nla_strlcpy(ifname, nla[NFTA_TABLE_DEV], IFNAMSIZ);
+		dev = dev_get_by_name(net, ifname);
+		if (!dev)
+			return -ENOENT;
+	} else if (nla[NFTA_TABLE_DEV]) {
+		return -EOPNOTSUPP;
+	}
+
+	err = -EAFNOSUPPORT;
 	if (!try_module_get(afi->owner))
-		return -EAFNOSUPPORT;
+		goto err1;
 
 	err = -ENOMEM;
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (table == NULL)
-		goto err1;
+		goto err2;
 
 	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
+	table->dev   = dev;
 
 	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
 	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	list_add_tail_rcu(&table->list, &afi->tables);
 	return 0;
-err2:
+err3:
 	kfree(table);
-err1:
+err2:
 	module_put(afi->owner);
+err1:
+	if (dev != NULL)
+		dev_put(dev);
+
 	return err;
 }
 
@@ -806,6 +838,9 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
 	BUG_ON(ctx->table->use > 0);
 
+	if (ctx->table->dev)
+		dev_put(ctx->table->dev);
+
 	kfree(ctx->table);
 	module_put(ctx->afi->owner);
 }
@@ -1361,6 +1396,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			ops->priority	= priority;
 			ops->priv	= chain;
 			ops->hook	= afi->hooks[ops->hooknum];
+			ops->dev	= table->dev;
 			if (hookfn)
 				ops->hook = hookfn;
 			if (afi->hook_ops_init)
-- 
cgit v1.2.3


From 8cf6f497de405ca9eb87ffb34d90699962d10125 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 26 May 2015 08:22:49 -0700
Subject: ethtool: Add helper routines to pass vf to rx_flow_spec

The ring_cookie is 64 bits wide which is much larger than can be used
for actual queue index values. So provide some helper routines to
pack a VF index into the cookie. This is useful to steer packets to
a VF ring without having to know the queue layout of the device.

CC: Alex Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/uapi/linux/ethtool.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index ae832b45b44c..0594933cdf55 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -796,6 +796,31 @@ struct ethtool_rx_flow_spec {
 	__u32		location;
 };
 
+/* How rings are layed out when accessing virtual functions or
+ * offloaded queues is device specific. To allow users to do flow
+ * steering and specify these queues the ring cookie is partitioned
+ * into a 32bit queue index with an 8 bit virtual function id.
+ * This also leaves the 3bytes for further specifiers. It is possible
+ * future devices may support more than 256 virtual functions if
+ * devices start supporting PCIe w/ARI. However at the moment I
+ * do not know of any devices that support this so I do not reserve
+ * space for this at this time. If a future patch consumes the next
+ * byte it should be aware of this possiblity.
+ */
+#define ETHTOOL_RX_FLOW_SPEC_RING	0x00000000FFFFFFFFLL
+#define ETHTOOL_RX_FLOW_SPEC_RING_VF	0x000000FF00000000LL
+#define ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF 32
+static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie)
+{
+	return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie;
+};
+
+static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
+{
+	return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >>
+				ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+};
+
 /**
  * struct ethtool_rxnfc - command to get or set RX flow classification rules
  * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH,
-- 
cgit v1.2.3


From 97f411d9175f018503e67e7552f92bc28844001b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Tue, 26 May 2015 07:34:21 -0300
Subject: [media] dvb: split enum from typedefs at frontend.h

Using typedefs is already bad enough, but doing it together
with enum declaration is even worse.

Also, it breaks the scripts at DocBook that would be generating
reference pointers for the enums.

Well, we can't get rid of typedef right now, but let's at least
declare it on a separate line, and let the scripts to generate
the cross-reference, as this is needed for the next DocBook
patches.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 466f56997272..ae481bc53a9c 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -36,7 +36,7 @@ typedef enum fe_type {
 } fe_type_t;
 
 
-typedef enum fe_caps {
+enum fe_caps {
 	FE_IS_STUPID			= 0,
 	FE_CAN_INVERSION_AUTO		= 0x1,
 	FE_CAN_FEC_1_2			= 0x2,
@@ -68,7 +68,9 @@ typedef enum fe_caps {
 	FE_NEEDS_BENDING		= 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */
 	FE_CAN_RECOVER			= 0x40000000, /* frontend can recover from a cable unplug automatically */
 	FE_CAN_MUTE_TS			= 0x80000000  /* frontend can stop spurious TS data output */
-} fe_caps_t;
+};
+
+typedef enum fe_caps fe_caps_t;
 
 
 struct dvb_frontend_info {
@@ -134,7 +136,7 @@ typedef enum fe_sec_mini_cmd {
  *			to reset DiSEqC, tone and parameters
  */
 
-typedef enum fe_status {
+enum fe_status {
 	FE_HAS_SIGNAL		= 0x01,
 	FE_HAS_CARRIER		= 0x02,
 	FE_HAS_VITERBI		= 0x04,
@@ -142,7 +144,9 @@ typedef enum fe_status {
 	FE_HAS_LOCK		= 0x10,
 	FE_TIMEDOUT		= 0x20,
 	FE_REINIT		= 0x40,
-} fe_status_t;
+};
+
+typedef enum fe_status fe_status_t;
 
 typedef enum fe_spectral_inversion {
 	INVERSION_OFF,
-- 
cgit v1.2.3


From d6b6d346e5605ee2af0f0349e71901121b984258 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Tue, 26 May 2015 19:33:58 -0300
Subject: [media] DocBook: better document FE_SET_VOLTAGE ioctl

Use the proper format for FE_SET_VOLTAGE documentation and fix
the documentation. The description for the enum is not 100%,
and it is missing the voltage off value.

Also, it is better to keep the enum description together with
the ioctl, as both are used together.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/fe-set-voltage.xml | 94 ++++++++++++++++++++++
 Documentation/DocBook/media/dvb/frontend.xml       | 62 +-------------
 include/uapi/linux/dvb/frontend.h                  |  6 +-
 3 files changed, 99 insertions(+), 63 deletions(-)
 create mode 100644 Documentation/DocBook/media/dvb/fe-set-voltage.xml

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/fe-set-voltage.xml b/Documentation/DocBook/media/dvb/fe-set-voltage.xml
new file mode 100644
index 000000000000..a1ee5f9c28e0
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/fe-set-voltage.xml
@@ -0,0 +1,94 @@
+<refentry id="FE_SET_VOLTAGE">
+  <refmeta>
+    <refentrytitle>ioctl FE_SET_VOLTAGE</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>FE_SET_VOLTAGE</refname>
+    <refpurpose>Allow setting the DC level sent to the antenna subsystem.</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>&fe-sec-voltage; *<parameter>voltage</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+        <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fe_fd;</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>FE_SET_VOLTAGE</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>pointer to &fe-sec-voltage;</parameter></term>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+<para>This ioctl allows to set the DC voltage level sent through the antenna
+    cable to 13V, 18V or off.</para>
+<para>Usually, a satellital antenna subsystems require that the digital TV
+    device to send a DC voltage to feed power to the LNBf. Depending on the
+    LNBf type, the polarization or the intermediate frequency (IF) of the LNBf
+    can controlled by the voltage level. Other devices (for example, the ones
+    that implement DISEqC and multipoint LNBf's don't need to control the
+    voltage level, provided that either 13V or 18V is sent to power up the
+    LNBf.</para>
+<para>NOTE: if more than one device is connected to the same antenna,
+    setting a voltage level may interfere on other devices, as they may lose
+    the capability of setting polarization or IF. So, on those
+    cases, setting the voltage to SEC_VOLTAGE_OFF while the device is not is
+    used is recommended.</para>
+
+&return-value-dvb;
+</refsect1>
+
+<section id="fe-sec-voltage-t">
+<title>enum fe_sec_voltage</title>
+
+<table pgwide="1" frame="none" id="fe-sec-voltage">
+    <title>enum fe_status</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char">SEC_VOLTAGE_13</entry>
+	    <entry align="char">Set DC voltage level to 13V</entry>
+	</row><row>
+	    <entry align="char">SEC_VOLTAGE_18</entry>
+	    <entry align="char">Set DC voltage level to 18V</entry>
+	</row><row>
+	    <entry align="char">SEC_VOLTAGE_OFF</entry>
+	    <entry align="char">Don't send any voltage to the antenna</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
+</refentry>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index bb2cd9ef3b03..584c759b6bbe 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -75,19 +75,6 @@ specification is available at
 </programlisting>
 </section>
 
-<section id="fe-sec-voltage-t">
-<title>diseqc slave reply</title>
-<para>The voltage is usually used with non-DiSEqC capable LNBs to switch the polarzation
-(horizontal/vertical). When using DiSEqC epuipment this voltage has to be switched
-consistently to the DiSEqC commands as described in the DiSEqC spec.</para>
-<programlisting>
-	typedef enum fe_sec_voltage {
-	SEC_VOLTAGE_13,
-	SEC_VOLTAGE_18
-	} fe_sec_voltage_t;
-</programlisting>
-</section>
-
 <section id="fe-sec-tone-mode-t">
 <title>SEC continuous tone</title>
 
@@ -641,54 +628,7 @@ typedef enum fe_hierarchy {
 &return-value-dvb;
 </section>
 
-<section id="FE_SET_VOLTAGE">
-<title>FE_SET_VOLTAGE</title>
-<para>DESCRIPTION
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>This call is used to set the bus voltage. This call requires read/write
- permissions.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-<para>SYNOPSIS
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>int ioctl(int fd, int request = <link linkend="FE_SET_VOLTAGE">FE_SET_VOLTAGE</link>,
- fe_sec_voltage_t voltage);</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-<para>PARAMETERS
-</para>
-<informaltable><tgroup cols="2"><tbody><row><entry
- align="char">
-<para>int fd</para>
-</entry><entry
- align="char">
-<para>File descriptor returned by a previous call to open().</para>
-</entry>
- </row><row><entry
- align="char">
-<para>int request</para>
-</entry><entry
- align="char">
-<para>Equals <link linkend="FE_SET_VOLTAGE">FE_SET_VOLTAGE</link> for this command.</para>
-</entry>
- </row><row><entry
- align="char">
-<para>fe_sec_voltage_t
- voltage</para>
-</entry><entry
- align="char">
-<para>The requested bus voltage.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-&return-value-dvb;
-</section>
-
+&sub-fe-set-voltage;
 &sub-fe-enable-high-lnb-voltage;
 &sub-fe-set-frontend-tune-mode;
 
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index ae481bc53a9c..c1ccbc82024c 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -105,11 +105,13 @@ struct dvb_diseqc_slave_reply {
 };			/*  errorcode when no message was received  */
 
 
-typedef enum fe_sec_voltage {
+enum fe_sec_voltage {
 	SEC_VOLTAGE_13,
 	SEC_VOLTAGE_18,
 	SEC_VOLTAGE_OFF
-} fe_sec_voltage_t;
+};
+
+typedef enum fe_sec_voltage fe_sec_voltage_t;
 
 
 typedef enum fe_sec_tone_mode {
-- 
cgit v1.2.3


From 6dc59e7a195fc8852e98d64805f44c46c35e40cd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Wed, 27 May 2015 07:15:50 -0300
Subject: [media] DocBook: better document FE_SET_TONE ioctl

Use the proper format for FE_SET_TONE documentation and
improve the documentation.

Keep the enum fe_sec_tone_mode description together with
the ioctl, as both are used together.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/fe-set-tone.xml | 88 +++++++++++++++++++++++++
 Documentation/DocBook/media/dvb/frontend.xml    | 61 +----------------
 include/uapi/linux/dvb/frontend.h               |  6 +-
 3 files changed, 93 insertions(+), 62 deletions(-)
 create mode 100644 Documentation/DocBook/media/dvb/fe-set-tone.xml

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/fe-set-tone.xml b/Documentation/DocBook/media/dvb/fe-set-tone.xml
new file mode 100644
index 000000000000..b4b1f5303170
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/fe-set-tone.xml
@@ -0,0 +1,88 @@
+<refentry id="FE_SET_TONE">
+  <refmeta>
+    <refentrytitle>ioctl FE_SET_TONE</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>FE_SET_TONE</refname>
+    <refpurpose>Sets/resets the generation of the continuous 22kHz tone.</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>&fe-sec-tone-mode; *<parameter>tone</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+        <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fe_fd;</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>FE_SET_TONE</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>pointer to &fe-sec-tone-mode;</parameter></term>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+<para>This ioctl is used to set the generation of the continuous 22kHz tone.
+    This call requires read/write permissions.</para>
+<para>Usually, satellital antenna subsystems require that the digital TV
+    device to send a 22kHz tone in order to select between high/low band on
+    some dual-band LNBf. It is also used to send signals to DiSEqC equipment,
+    but this is done using the DiSEqC ioctls.</para>
+<para>NOTE: if more than one device is connected to the same antenna,
+    setting a tone may interfere on other devices, as they may lose
+    the capability of selecting the band. So, it is recommended that
+    applications would change to SEC_TONE_OFF when the device is not used.</para>
+
+&return-value-dvb;
+</refsect1>
+
+<section id="fe-sec-tone-mode-t">
+<title>enum fe_sec_voltage</title>
+
+<table pgwide="1" frame="none" id="fe-sec-tone-mode">
+    <title>enum fe_sec_tone_mode</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char">SEC_TONE_ON</entry>
+	    <entry align="char">Sends a 22kHz tone burst to the antenna</entry>
+	</row><row>
+	    <entry align="char">SEC_TONE_OFF</entry>
+	    <entry align="char">Don't send a 22kHz tone to the antenna
+		(except if the FE_DISEQC_* ioctls are called)</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
+</refentry>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 584c759b6bbe..f05da4abb3fe 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -75,21 +75,6 @@ specification is available at
 </programlisting>
 </section>
 
-<section id="fe-sec-tone-mode-t">
-<title>SEC continuous tone</title>
-
-<para>The continuous 22KHz tone is usually used with non-DiSEqC capable LNBs to switch the
-high/low band of a dual-band LNB. When using DiSEqC epuipment this voltage has to
-be switched consistently to the DiSEqC commands as described in the DiSEqC
-spec.</para>
-<programlisting>
-	typedef enum fe_sec_tone_mode {
-	SEC_TONE_ON,
-	SEC_TONE_OFF
-	} fe_sec_tone_mode_t;
-</programlisting>
-</section>
-
 <section id="fe-sec-mini-cmd-t">
 <title>SEC tone burst</title>
 
@@ -582,52 +567,8 @@ typedef enum fe_hierarchy {
 &return-value-dvb;
 </section>
 
-<section id="FE_SET_TONE">
-<title>FE_SET_TONE</title>
-<para>DESCRIPTION
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>This call is used to set the generation of the continuous 22kHz tone. This call
- requires read/write permissions.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-<para>SYNOPSIS
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>int ioctl(int fd, int request = <link linkend="FE_SET_TONE">FE_SET_TONE</link>,
- fe_sec_tone_mode_t tone);</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-<para>PARAMETERS
-</para>
-<informaltable><tgroup cols="2"><tbody><row><entry
- align="char">
-<para>int fd</para>
-</entry><entry
- align="char">
-<para>File descriptor returned by a previous call to open().</para>
-</entry>
- </row><row><entry
- align="char">
-<para>int request</para>
-</entry><entry
- align="char">
-<para>Equals <link linkend="FE_SET_TONE">FE_SET_TONE</link> for this command.</para>
-</entry>
- </row><row><entry
- align="char">
-<para>fe_sec_tone_mode_t
- tone</para>
-</entry><entry
- align="char">
-<para>The requested tone generation mode (on/off).</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-&return-value-dvb;
-</section>
 
+&sub-fe-set-tone;
 &sub-fe-set-voltage;
 &sub-fe-enable-high-lnb-voltage;
 &sub-fe-set-frontend-tune-mode;
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index c1ccbc82024c..1a098819473f 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -114,10 +114,12 @@ enum fe_sec_voltage {
 typedef enum fe_sec_voltage fe_sec_voltage_t;
 
 
-typedef enum fe_sec_tone_mode {
+enum fe_sec_tone_mode {
 	SEC_TONE_ON,
 	SEC_TONE_OFF
-} fe_sec_tone_mode_t;
+};
+
+typedef enum fe_sec_tone_mode fe_sec_tone_mode_t;
 
 
 typedef enum fe_sec_mini_cmd {
-- 
cgit v1.2.3


From 81959d996a3b6ea542ebffc7e394530f4638c6ca Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Wed, 27 May 2015 22:20:14 -0300
Subject: [media] DocBook: better document FE_DISEQC_SEND_BURST ioctl

Use the proper format for FE_DISEQC_SEND_BURST documentation
and  improve the documentation.

Keep the enum fe_sec_mini_cmd description together with
the ioctl, as both are used together.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 .../DocBook/media/dvb/fe-diseqc-send-burst.xml     | 86 ++++++++++++++++++++++
 Documentation/DocBook/media/dvb/frontend.xml       | 67 +----------------
 include/uapi/linux/dvb/frontend.h                  |  6 +-
 3 files changed, 91 insertions(+), 68 deletions(-)
 create mode 100644 Documentation/DocBook/media/dvb/fe-diseqc-send-burst.xml

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/fe-diseqc-send-burst.xml b/Documentation/DocBook/media/dvb/fe-diseqc-send-burst.xml
new file mode 100644
index 000000000000..d1a798048641
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/fe-diseqc-send-burst.xml
@@ -0,0 +1,86 @@
+<refentry id="FE_DISEQC_SEND_BURST">
+  <refmeta>
+    <refentrytitle>ioctl FE_DISEQC_SEND_BURST</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>FE_DISEQC_SEND_BURST</refname>
+    <refpurpose>Sends a 22KHz tone burst for 2x1 mini DiSEqC satellite selection.</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>&fe-sec-mini-cmd; *<parameter>tone</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+        <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fe_fd;</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>FE_DISEQC_SEND_BURST</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>pointer to &fe-sec-mini-cmd;</parameter></term>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+<para>This ioctl is used to set the generation of a 22kHz tone burst for mini
+    DiSEqC satellite
+    selection for 2x1 switches.
+    This call requires read/write permissions.</para>
+<para>It provides support for what's specified at
+    <ulink url="http://www.eutelsat.com/files/contributed/satellites/pdf/Diseqc/associated%20docs/simple_tone_burst_detec.pdf">Digital Satellite Equipment Control
+	(DiSEqC) - Simple "ToneBurst" Detection Circuit specification.</ulink>
+    </para>
+&return-value-dvb;
+</refsect1>
+
+<section id="fe-sec-mini-cmd-t">
+<title>enum fe_sec_mini_cmd</title>
+
+<table pgwide="1" frame="none" id="fe-sec-mini-cmd">
+    <title>enum fe_sec_tone_mode</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char">SEC_MINI_A</entry>
+	    <entry align="char">Sends a mini-DiSEqC 22kHz '0' Tone Burst to
+		select satellite-A</entry>
+	</row><row>
+	    <entry align="char">SEC_MINI_B</entry>
+	    <entry align="char">Sends a mini-DiSEqC 22kHz '1' Data Burst to
+		select satellite-B</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
+</refentry>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index f05da4abb3fe..17050152a48a 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -75,23 +75,6 @@ specification is available at
 </programlisting>
 </section>
 
-<section id="fe-sec-mini-cmd-t">
-<title>SEC tone burst</title>
-
-<para>The 22KHz tone burst is usually used with non-DiSEqC capable switches to select
-between two connected LNBs/satellites. When using DiSEqC epuipment this voltage has to
-be switched consistently to the DiSEqC commands as described in the DiSEqC
-spec.</para>
-<programlisting>
-	typedef enum fe_sec_mini_cmd {
-	SEC_MINI_A,
-	SEC_MINI_B
-	} fe_sec_mini_cmd_t;
-</programlisting>
-
-<para></para>
-</section>
-
 <section id="fe-spectral-inversion-t">
 <title>frontend spectral inversion</title>
 <para>The Inversion field can take one of these values:
@@ -519,55 +502,7 @@ typedef enum fe_hierarchy {
 &return-value-dvb;
 </section>
 
-<section id="FE_DISEQC_SEND_BURST">
-<title>FE_DISEQC_SEND_BURST</title>
-<para>DESCRIPTION
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>This ioctl call is used to send a 22KHz tone burst.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-<para>SYNOPSIS
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>int ioctl(int fd, int request =
- <link linkend="FE_DISEQC_SEND_BURST">FE_DISEQC_SEND_BURST</link>, fe_sec_mini_cmd_t burst);</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-<para>PARAMETERS
-</para>
-<informaltable><tgroup cols="2"><tbody><row><entry
- align="char">
-<para>int fd</para>
-</entry><entry
- align="char">
-<para>File descriptor returned by a previous call to open().</para>
-</entry>
- </row><row><entry
- align="char">
-<para>int request</para>
-</entry><entry
- align="char">
-<para>Equals <link linkend="FE_DISEQC_SEND_BURST">FE_DISEQC_SEND_BURST</link> for this command.</para>
-</entry>
- </row><row><entry
- align="char">
-<para>fe_sec_mini_cmd_t
- burst</para>
-</entry><entry
- align="char">
-<para>burst A or B.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-&return-value-dvb;
-</section>
-
-
+&sub-fe-diseqc-send-burst;
 &sub-fe-set-tone;
 &sub-fe-set-voltage;
 &sub-fe-enable-high-lnb-voltage;
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 1a098819473f..dd64e6d5d881 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -122,10 +122,12 @@ enum fe_sec_tone_mode {
 typedef enum fe_sec_tone_mode fe_sec_tone_mode_t;
 
 
-typedef enum fe_sec_mini_cmd {
+enum fe_sec_mini_cmd {
 	SEC_MINI_A,
 	SEC_MINI_B
-} fe_sec_mini_cmd_t;
+};
+
+typedef enum fe_sec_mini_cmd fe_sec_mini_cmd_t;
 
 
 /**
-- 
cgit v1.2.3


From 997eb9039df27dfd5b1901e26ebae09d5dbe6cff Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 17:21:05 -0300
Subject: [media] DocBook: Better document enum fe_modulation

Instead of using programlisting, use a table, as this provides
a better view of the structure.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 91 +++++++++++++++++++------
 Documentation/DocBook/media/dvb/frontend.xml    | 26 -------
 include/uapi/linux/dvb/frontend.h               |  6 +-
 3 files changed, 76 insertions(+), 47 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 0fa4ccfd406d..d9861b54f8c8 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -137,25 +137,78 @@ get/set up to 64 properties. The actual meaning of each property is described on
 	</section>
 	<section id="DTV-MODULATION">
 	<title><constant>DTV_MODULATION</constant></title>
-<para>Specifies the frontend modulation type for cable and satellite types. The modulation can be one of the types bellow:</para>
-<programlisting>
- typedef enum fe_modulation {
-	QPSK,
-	QAM_16,
-	QAM_32,
-	QAM_64,
-	QAM_128,
-	QAM_256,
-	QAM_AUTO,
-	VSB_8,
-	VSB_16,
-	PSK_8,
-	APSK_16,
-	APSK_32,
-	DQPSK,
-	QAM_4_NR,
- } fe_modulation_t;
-</programlisting>
+<para>Specifies the frontend modulation type for delivery systems that supports
+    more than one modulation type. The modulation can be one of the types
+    defined by &fe-modulation;.</para>
+
+
+<section id="fe-modulation-t">
+<title>Modulation property</title>
+
+<para>Most of the digital TV standards currently offers more than one possible
+    modulation (sometimes called as "constellation" on some standards). This
+    enum contains the values used by the Kernel. Please notice that not all
+    modulations are supported by a given standard.</para>
+
+<table pgwide="1" frame="none" id="fe-modulation">
+    <title>enum fe_modulation</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>QPSK</entry>
+	    <entry>QPSK modulation</entry>
+	</row><row>
+	    <entry>QAM_16</entry>
+	    <entry>16-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_32</entry>
+	    <entry>32-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_64</entry>
+	    <entry>64-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_128</entry>
+	    <entry>128-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_256</entry>
+	    <entry>256-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_AUTO</entry>
+	    <entry>Autodetect QAM modulation</entry>
+	</row><row>
+	    <entry>VSB_8</entry>
+	    <entry>8-VSB modulation</entry>
+	</row><row>
+	    <entry>VSB_16</entry>
+	    <entry>16-VSB modulation</entry>
+	</row><row>
+	    <entry>PSK_8</entry>
+	    <entry>8-PSK modulation</entry>
+	</row><row>
+	    <entry>APSK_16</entry>
+	    <entry>16-APSK modulation</entry>
+	</row><row>
+	    <entry>APSK_32</entry>
+	    <entry>32-APSK modulation</entry>
+	</row><row>
+	    <entry>DQPSK</entry>
+	    <entry>DQPSK modulation</entry>
+	</row><row>
+	    <entry>QAM_4_NR</entry>
+	    <entry>4-QAM-NR modulation</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
 	</section>
 	<section id="DTV-BANDWIDTH-HZ">
 		<title><constant>DTV_BANDWIDTH_HZ</constant></title>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 16a4648043d6..07c1284e88c8 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -100,32 +100,6 @@ detection.
 </para>
 </section>
 
-<section id="fe-modulation-t">
-<title>frontend modulation type for QAM, OFDM and VSB</title>
-<para>For cable and terrestrial frontends, e. g. for
-<link linkend="dvb-qam-parameters"><constant>struct dvb_qpsk_parameters</constant></link>,
-<link linkend="dvb-ofdm-parameters"><constant>struct dvb_qam_parameters</constant></link> and
-<link linkend="dvb-vsb-parameters"><constant>struct dvb_qam_parameters</constant></link>,
-it needs to specify the quadrature modulation mode which can be one of the following:
-</para>
-<programlisting>
- typedef enum fe_modulation {
-	QPSK,
-	QAM_16,
-	QAM_32,
-	QAM_64,
-	QAM_128,
-	QAM_256,
-	QAM_AUTO,
-	VSB_8,
-	VSB_16,
-	PSK_8,
-	APSK_16,
-	APSK_32,
-	DQPSK,
- } fe_modulation_t;
-</programlisting>
-</section>
 
 <section>
 <title>More OFDM parameters</title>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index dd64e6d5d881..d4b1718046ae 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -178,7 +178,7 @@ typedef enum fe_code_rate {
 } fe_code_rate_t;
 
 
-typedef enum fe_modulation {
+enum fe_modulation {
 	QPSK,
 	QAM_16,
 	QAM_32,
@@ -193,7 +193,9 @@ typedef enum fe_modulation {
 	APSK_32,
 	DQPSK,
 	QAM_4_NR,
-} fe_modulation_t;
+};
+
+typedef enum fe_modulation fe_modulation_t;
 
 typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
-- 
cgit v1.2.3


From 58e11cc3c1f7d7e9fa70ba6c3d363456151fcffd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 20:00:43 -0300
Subject: [media] DocBook: improve documentation for DVB spectral inversion

Format it as a table and provide more details.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 52 ++++++++++++++++------
 Documentation/DocBook/media/dvb/frontend.xml       | 17 -------
 .../DocBook/media/dvb/frontend_legacy_api.xml      |  2 +-
 include/uapi/linux/dvb/frontend.h                  |  5 ++-
 4 files changed, 43 insertions(+), 33 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index d9861b54f8c8..41085537acfc 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -238,19 +238,45 @@ get/set up to 64 properties. The actual meaning of each property is described on
 	</section>
 	<section id="DTV-INVERSION">
 	<title><constant>DTV_INVERSION</constant></title>
-	<para>The Inversion field can take one of these values:
-	</para>
-	<programlisting>
-	typedef enum fe_spectral_inversion {
-		INVERSION_OFF,
-		INVERSION_ON,
-		INVERSION_AUTO
-	} fe_spectral_inversion_t;
-	</programlisting>
-	<para>It indicates if spectral inversion should be presumed or not. In the automatic setting
-	(<constant>INVERSION_AUTO</constant>) the hardware will try to figure out the correct setting by
-	itself.
-	</para>
+
+	<para>Specifies if the frontend should do spectral inversion or not.</para>
+
+<section id="fe-spectral-inversion-t">
+<title>enum fe_modulation: Frontend spectral inversion</title>
+
+<para>This parameter indicates if spectral inversion should be presumed or not.
+    In the automatic setting (<constant>INVERSION_AUTO</constant>) the hardware
+    will try to figure out the correct setting by itself. If the hardware
+    doesn't support, the DVB core will try to lock at the carrier first with
+    inversion off. If it fails, it will try to enable inversion.
+</para>
+
+<table pgwide="1" frame="none" id="fe-spectral-inversion">
+    <title>enum fe_modulation</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>INVERSION_OFF</entry>
+	    <entry>Don't do spectral band inversion.</entry>
+	</row><row>
+	    <entry>INVERSION_ON</entry>
+	    <entry>Do spectral band inversion.</entry>
+	</row><row>
+	    <entry>INVERSION_AUTO</entry>
+	    <entry>Autodetect spectral band inversion.</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
 	</section>
 	<section id="DTV-DISEQC-MASTER">
 	<title><constant>DTV_DISEQC_MASTER</constant></title>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 07c1284e88c8..77dd88ceeedd 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -56,23 +56,6 @@ specification is available at
 
 &sub-dvbproperty;
 
-<section id="fe-spectral-inversion-t">
-<title>frontend spectral inversion</title>
-<para>The Inversion field can take one of these values:
-</para>
-<programlisting>
-typedef enum fe_spectral_inversion {
-	INVERSION_OFF,
-	INVERSION_ON,
-	INVERSION_AUTO
-} fe_spectral_inversion_t;
-</programlisting>
-<para>It indicates if spectral inversion should be presumed or not. In the automatic setting
-(<constant>INVERSION_AUTO</constant>) the hardware will try to figure out the correct setting by
-itself.
-</para>
-</section>
-
 <section id="fe-code-rate-t">
 <title>frontend code rate</title>
 <para>The possible values for the <constant>fec_inner</constant> field used on
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index 7d5823858df0..fe1117e91f51 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -82,7 +82,7 @@ DVB-C2, ISDB, etc.</para>
 struct dvb_frontend_parameters {
 	uint32_t frequency;     /&#x22C6; (absolute) frequency in Hz for QAM/OFDM &#x22C6;/
 				/&#x22C6; intermediate frequency in kHz for QPSK &#x22C6;/
-	fe_spectral_inversion_t inversion;
+	&fe-spectral-inversion-t; inversion;
 	union {
 		struct dvb_qpsk_parameters qpsk;
 		struct dvb_qam_parameters  qam;
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index d4b1718046ae..223905563676 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -154,12 +154,13 @@ enum fe_status {
 
 typedef enum fe_status fe_status_t;
 
-typedef enum fe_spectral_inversion {
+enum fe_spectral_inversion {
 	INVERSION_OFF,
 	INVERSION_ON,
 	INVERSION_AUTO
-} fe_spectral_inversion_t;
+};
 
+typedef enum fe_spectral_inversion fe_spectral_inversion_t;
 
 typedef enum fe_code_rate {
 	FEC_NONE = 0,
-- 
cgit v1.2.3


From 0577a2f6d84a08da96c908a885db16b4d3532dc4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 20:52:52 -0300
Subject: [media] DocBook: improve documentation for OFDM transmission mode

Format it as a table and add more details, in special, for
the DTMB modes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 69 +++++++++++++++++-----
 Documentation/DocBook/media/dvb/frontend.xml       | 15 -----
 .../DocBook/media/dvb/frontend_legacy_api.xml      |  2 +-
 include/uapi/linux/dvb/frontend.h                  |  6 +-
 4 files changed, 59 insertions(+), 33 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 41085537acfc..06a12f1c57c5 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -827,22 +827,61 @@ typedef enum fe_guard_interval {
 	<section id="DTV-TRANSMISSION-MODE">
 		<title><constant>DTV_TRANSMISSION_MODE</constant></title>
 
-		<para>Specifies the number of carriers used by the standard</para>
+		<para>Specifies the number of carriers used by the standard.
+		    This is used only on OFTM-based standards, e. g.
+		    DVB-T/T2, ISDB-T, DTMB</para>
+
+<section id="fe-transmit-mode-t">
+<title>enum fe_transmit_mode: Number of carriers per channel</title>
+
+<table pgwide="1" frame="none" id="fe-transmit-mode">
+    <title>enum fe_transmit_mode</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>TRANSMISSION_MODE_AUTO</entry>
+	    <entry>Autodetect transmission mode. The hardware will try to find
+		the correct FFT-size (if capable) to fill in the missing
+		parameters.</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_1K</entry>
+	    <entry>Transmission mode 1K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_2K</entry>
+	    <entry>Transmission mode 2K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_8K</entry>
+	    <entry>Transmission mode 8K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_4K</entry>
+	    <entry>Transmission mode 4K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_16K</entry>
+	    <entry>Transmission mode 16K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_32K</entry>
+	    <entry>Transmission mode 32K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_C1</entry>
+	    <entry>Single Carrier (C=1) transmission mode (DTMB)</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_C3780</entry>
+	    <entry>Multi Carrier (C=3780) transmission mode (DTMB)</entry>
+	</row><row>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
 
-		<para>Possible values are:</para>
-<programlisting>
-typedef enum fe_transmit_mode {
-	TRANSMISSION_MODE_2K,
-	TRANSMISSION_MODE_8K,
-	TRANSMISSION_MODE_AUTO,
-	TRANSMISSION_MODE_4K,
-	TRANSMISSION_MODE_1K,
-	TRANSMISSION_MODE_16K,
-	TRANSMISSION_MODE_32K,
-	TRANSMISSION_MODE_C1,
-	TRANSMISSION_MODE_C3780,
-} fe_transmit_mode_t;
-</programlisting>
 		<para>Notes:</para>
 		<para>1) ISDB-T supports three carrier/symbol-size: 8K, 4K, 2K. It is called
 			'mode' in the standard: Mode 1 is 2K, mode 2 is 4K, mode 3 is 8K</para>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 77dd88ceeedd..3b6a169ac8f3 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -87,21 +87,6 @@ detection.
 <section>
 <title>More OFDM parameters</title>
 
-<section id="fe-transmit-mode-t">
-<title>Number of carriers per channel</title>
-<programlisting>
-typedef enum fe_transmit_mode {
-	TRANSMISSION_MODE_2K,
-	TRANSMISSION_MODE_8K,
-	TRANSMISSION_MODE_AUTO,
-	TRANSMISSION_MODE_4K,
-	TRANSMISSION_MODE_1K,
-	TRANSMISSION_MODE_16K,
-	TRANSMISSION_MODE_32K,
- } fe_transmit_mode_t;
-</programlisting>
-</section>
-
 <section id="fe-bandwidth-t">
 <title>frontend bandwidth</title>
 <programlisting>
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index fe1117e91f51..fa0c6649abfd 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -139,7 +139,7 @@ struct dvb_vsb_parameters {
 	 fe_code_rate_t      code_rate_HP;  /&#x22C6; high priority stream code rate &#x22C6;/
 	 fe_code_rate_t      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
-	 fe_transmit_mode_t  transmission_mode;
+	 &fe-transmit-mode-t;  transmission_mode;
 	 fe_guard_interval_t guard_interval;
 	 fe_hierarchy_t      hierarchy_information;
  };
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 223905563676..c42e6d849f52 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -198,7 +198,7 @@ enum fe_modulation {
 
 typedef enum fe_modulation fe_modulation_t;
 
-typedef enum fe_transmit_mode {
+enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_8K,
 	TRANSMISSION_MODE_AUTO,
@@ -208,7 +208,9 @@ typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_32K,
 	TRANSMISSION_MODE_C1,
 	TRANSMISSION_MODE_C3780,
-} fe_transmit_mode_t;
+};
+
+typedef enum fe_transmit_mode fe_transmit_mode_t;
 
 #if defined(__DVB_CORE__) || !defined (__KERNEL__)
 typedef enum fe_bandwidth {
-- 
cgit v1.2.3


From b174fb71e82eef2355aabece4b50fe1540e67544 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 20:57:53 -0300
Subject: [media] DocBook: move fe_bandwidth to the frontend legacy section

fe_bandwidth/fe_bandwidth_t is used only on DVBv3 API. So, move
it to the frontend legacy xml, and convert it into a table.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/frontend.xml       | 15 --------
 .../DocBook/media/dvb/frontend_legacy_api.xml      | 44 +++++++++++++++++++++-
 include/uapi/linux/dvb/frontend.h                  |  6 ++-
 3 files changed, 47 insertions(+), 18 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 3b6a169ac8f3..93d22486f20c 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -87,21 +87,6 @@ detection.
 <section>
 <title>More OFDM parameters</title>
 
-<section id="fe-bandwidth-t">
-<title>frontend bandwidth</title>
-<programlisting>
-typedef enum fe_bandwidth {
-	BANDWIDTH_8_MHZ,
-	BANDWIDTH_7_MHZ,
-	BANDWIDTH_6_MHZ,
-	BANDWIDTH_AUTO,
-	BANDWIDTH_5_MHZ,
-	BANDWIDTH_10_MHZ,
-	BANDWIDTH_1_712_MHZ,
-} fe_bandwidth_t;
-</programlisting>
-</section>
-
 <section id="fe-guard-interval-t">
 <title>frontend guard inverval</title>
 <programlisting>
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index fa0c6649abfd..ed393f22f7a7 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -60,6 +60,48 @@ supported via the new <link linkend="FE_GET_PROPERTY">FE_GET_PROPERTY/FE_GET_SET
     using the &DTV-DELIVERY-SYSTEM; property.</para>
 </section>
 
+<section id="fe-bandwidth-t">
+<title>Frontend bandwidth</title>
+
+<table pgwide="1" frame="none" id="fe-bandwidth">
+    <title>enum fe_bandwidth</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>BANDWIDTH_AUTO</entry>
+	    <entry>Autodetect bandwidth (if supported)</entry>
+	</row><row>
+	    <entry>BANDWIDTH_1_712_MHZ</entry>
+	    <entry>1.712 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_5_MHZ</entry>
+	    <entry>5 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_6_MHZ</entry>
+	    <entry>6 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_7_MHZ</entry>
+	    <entry>7 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_8_MHZ</entry>
+	    <entry>8 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_10_MHZ</entry>
+	    <entry>10 MHz</entry>
+	</row><row>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+
+</section>
 
 <section id="dvb-frontend-parameters">
 <title>frontend parameters</title>
@@ -135,7 +177,7 @@ struct dvb_vsb_parameters {
 <para>DVB-T frontends are supported by the <constant>dvb_ofdm_parameters</constant> structure:</para>
 <programlisting>
  struct dvb_ofdm_parameters {
-	 fe_bandwidth_t      bandwidth;
+	 &fe-bandwidth-t;      bandwidth;
 	 fe_code_rate_t      code_rate_HP;  /&#x22C6; high priority stream code rate &#x22C6;/
 	 fe_code_rate_t      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index c42e6d849f52..43e6faf91849 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -213,7 +213,7 @@ enum fe_transmit_mode {
 typedef enum fe_transmit_mode fe_transmit_mode_t;
 
 #if defined(__DVB_CORE__) || !defined (__KERNEL__)
-typedef enum fe_bandwidth {
+enum fe_bandwidth {
 	BANDWIDTH_8_MHZ,
 	BANDWIDTH_7_MHZ,
 	BANDWIDTH_6_MHZ,
@@ -221,7 +221,9 @@ typedef enum fe_bandwidth {
 	BANDWIDTH_5_MHZ,
 	BANDWIDTH_10_MHZ,
 	BANDWIDTH_1_712_MHZ,
-} fe_bandwidth_t;
+};
+
+typedef enum fe_bandwidth fe_bandwidth_t;
 #endif
 
 typedef enum fe_guard_interval {
-- 
cgit v1.2.3


From 2d457b8a9054b9c5b1fcfbc5702b7d0e9f6cda2b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 21:38:44 -0300
Subject: [media] DocBook: improve documentation for FEC fields

Format it as a table and add more details. Also, remove the
duplicated occurrences.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 123 ++++++++++++---------
 Documentation/DocBook/media/dvb/frontend.xml       |  28 -----
 .../DocBook/media/dvb/frontend_legacy_api.xml      |   8 +-
 include/uapi/linux/dvb/frontend.h                  |   6 +-
 4 files changed, 78 insertions(+), 87 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 06a12f1c57c5..b96a91a1494d 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -290,25 +290,70 @@ get/set up to 64 properties. The actual meaning of each property is described on
 	<title><constant>DTV_INNER_FEC</constant></title>
 	<para>Used cable/satellite transmissions. The acceptable values are:
 	</para>
-	<programlisting>
-typedef enum fe_code_rate {
-	FEC_NONE = 0,
-	FEC_1_2,
-	FEC_2_3,
-	FEC_3_4,
-	FEC_4_5,
-	FEC_5_6,
-	FEC_6_7,
-	FEC_7_8,
-	FEC_8_9,
-	FEC_AUTO,
-	FEC_3_5,
-	FEC_9_10,
-	FEC_2_5,
-} fe_code_rate_t;
-	</programlisting>
-	<para>which correspond to error correction rates of 1/2, 2/3, etc.,
-	no error correction or auto detection.</para>
+<section id="fe-code-rate-t">
+<title>enum fe_code_rate: type of the Forward Error Correction.</title>
+
+<table pgwide="1" frame="none" id="fe-code-rate">
+    <title>enum fe_code_rate</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>TRANSMISSION_MODE_AUTO</entry>
+	    <entry>Autodetect transmission mode. The hardware will try to find
+		the correct FFT-size (if capable) to fill in the missing
+		parameters.</entry>
+	</row><row>
+	    <entry>FEC_NONE</entry>
+	    <entry>No Forward Error Correction Code</entry>
+	</row><row>
+	    <entry>FEC_AUTO</entry>
+	    <entry>Autodetect Error Correction Code</entry>
+	</row><row>
+	    <entry>FEC_1_2</entry>
+	    <entry>Forward Error Correction Code 1/2</entry>
+	</row><row>
+	    <entry>FEC_2_3</entry>
+	    <entry>Forward Error Correction Code 2/3</entry>
+	</row><row>
+	    <entry>FEC_3_4</entry>
+	    <entry>Forward Error Correction Code 3/4</entry>
+	</row><row>
+	    <entry>FEC_4_5</entry>
+	    <entry>Forward Error Correction Code 4/5</entry>
+	</row><row>
+	    <entry>FEC_5_6</entry>
+	    <entry>Forward Error Correction Code 5/6</entry>
+	</row><row>
+	    <entry>FEC_6_7</entry>
+	    <entry>Forward Error Correction Code 6/7</entry>
+	</row><row>
+	    <entry>FEC_7_8</entry>
+	    <entry>Forward Error Correction Code 7/8</entry>
+	</row><row>
+	    <entry>FEC_8_9</entry>
+	    <entry>Forward Error Correction Code 8/9</entry>
+	</row><row>
+	    <entry>FEC_9_10</entry>
+	    <entry>Forward Error Correction Code 9/10</entry>
+	</row><row>
+	    <entry>FEC_2_5</entry>
+	    <entry>Forward Error Correction Code 2/5</entry>
+	</row><row>
+	    <entry>FEC_3_5</entry>
+	    <entry>Forward Error Correction Code 3/5</entry>
+	</row><row>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
 	</section>
 	<section id="DTV-VOLTAGE">
 	<title><constant>DTV_VOLTAGE</constant></title>
@@ -757,46 +802,18 @@ typedef enum atscmh_sccc_code_mode {
 	</section>
 	<section id="DTV-CODE-RATE-HP">
 	<title><constant>DTV_CODE_RATE_HP</constant></title>
-	<para>Used on terrestrial transmissions. The acceptable values are:
+	<para>Used on terrestrial transmissions.  The acceptable values are
+	    the ones described at &fe-transmit-mode-t;.
 	</para>
-	<programlisting>
-typedef enum fe_code_rate {
-	FEC_NONE = 0,
-	FEC_1_2,
-	FEC_2_3,
-	FEC_3_4,
-	FEC_4_5,
-	FEC_5_6,
-	FEC_6_7,
-	FEC_7_8,
-	FEC_8_9,
-	FEC_AUTO,
-	FEC_3_5,
-	FEC_9_10,
-} fe_code_rate_t;
-	</programlisting>
 	</section>
 	<section id="DTV-CODE-RATE-LP">
 	<title><constant>DTV_CODE_RATE_LP</constant></title>
-	<para>Used on terrestrial transmissions. The acceptable values are:
+	<para>Used on terrestrial transmissions. The acceptable values are
+	    the ones described at &fe-transmit-mode-t;.
 	</para>
-	<programlisting>
-typedef enum fe_code_rate {
-	FEC_NONE = 0,
-	FEC_1_2,
-	FEC_2_3,
-	FEC_3_4,
-	FEC_4_5,
-	FEC_5_6,
-	FEC_6_7,
-	FEC_7_8,
-	FEC_8_9,
-	FEC_AUTO,
-	FEC_3_5,
-	FEC_9_10,
-} fe_code_rate_t;
-	</programlisting>
+
 	</section>
+
 	<section id="DTV-GUARD-INTERVAL">
 		<title><constant>DTV_GUARD_INTERVAL</constant></title>
 
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 93d22486f20c..563800eb1216 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -56,34 +56,6 @@ specification is available at
 
 &sub-dvbproperty;
 
-<section id="fe-code-rate-t">
-<title>frontend code rate</title>
-<para>The possible values for the <constant>fec_inner</constant> field used on
-<link linkend="dvb-qpsk-parameters"><constant>struct dvb_qpsk_parameters</constant></link> and
-<link linkend="dvb-qam-parameters"><constant>struct dvb_qam_parameters</constant></link> are:
-</para>
-<programlisting>
-typedef enum fe_code_rate {
-	FEC_NONE = 0,
-	FEC_1_2,
-	FEC_2_3,
-	FEC_3_4,
-	FEC_4_5,
-	FEC_5_6,
-	FEC_6_7,
-	FEC_7_8,
-	FEC_8_9,
-	FEC_AUTO,
-	FEC_3_5,
-	FEC_9_10,
-} fe_code_rate_t;
-</programlisting>
-<para>which correspond to error correction rates of 1/2, 2/3, etc., no error correction or auto
-detection.
-</para>
-</section>
-
-
 <section>
 <title>More OFDM parameters</title>
 
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index ed393f22f7a7..c1dfbd8096bd 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -145,7 +145,7 @@ OFDM frontends the <constant>frequency</constant> specifies the absolute frequen
 <programlisting>
  struct dvb_qpsk_parameters {
 	 uint32_t        symbol_rate;  /&#x22C6; symbol rate in Symbols per second &#x22C6;/
-	 fe_code_rate_t  fec_inner;    /&#x22C6; forward error correction (see above) &#x22C6;/
+	 &fe-code-rate-t;  fec_inner;    /&#x22C6; forward error correction (see above) &#x22C6;/
  };
 </programlisting>
 </section>
@@ -156,7 +156,7 @@ OFDM frontends the <constant>frequency</constant> specifies the absolute frequen
 <programlisting>
  struct dvb_qam_parameters {
 	 uint32_t         symbol_rate; /&#x22C6; symbol rate in Symbols per second &#x22C6;/
-	 fe_code_rate_t   fec_inner;   /&#x22C6; forward error correction (see above) &#x22C6;/
+	 &fe-code-rate-t;   fec_inner;   /&#x22C6; forward error correction (see above) &#x22C6;/
 	 &fe-modulation-t;  modulation;  /&#x22C6; modulation type (see above) &#x22C6;/
  };
 </programlisting>
@@ -178,8 +178,8 @@ struct dvb_vsb_parameters {
 <programlisting>
  struct dvb_ofdm_parameters {
 	 &fe-bandwidth-t;      bandwidth;
-	 fe_code_rate_t      code_rate_HP;  /&#x22C6; high priority stream code rate &#x22C6;/
-	 fe_code_rate_t      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
+	 &fe-code-rate-t;      code_rate_HP;  /&#x22C6; high priority stream code rate &#x22C6;/
+	 &fe-code-rate-t;      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
 	 &fe-transmit-mode-t;  transmission_mode;
 	 fe_guard_interval_t guard_interval;
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 43e6faf91849..49f6e980125b 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -162,7 +162,7 @@ enum fe_spectral_inversion {
 
 typedef enum fe_spectral_inversion fe_spectral_inversion_t;
 
-typedef enum fe_code_rate {
+enum fe_code_rate {
 	FEC_NONE = 0,
 	FEC_1_2,
 	FEC_2_3,
@@ -176,7 +176,9 @@ typedef enum fe_code_rate {
 	FEC_3_5,
 	FEC_9_10,
 	FEC_2_5,
-} fe_code_rate_t;
+};
+
+typedef enum fe_code_rate fe_code_rate_t;
 
 
 enum fe_modulation {
-- 
cgit v1.2.3


From 903142e53c648ee61c00f5c3b420b16bc6336ad7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 22:01:41 -0300
Subject: [media] DocBook: improve documentation for guard interval

Format it as a table and add more details, in special for
DTMB guard intervals.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 68 +++++++++++++++++-----
 Documentation/DocBook/media/dvb/frontend.xml       | 16 -----
 .../DocBook/media/dvb/frontend_legacy_api.xml      |  2 +-
 include/uapi/linux/dvb/frontend.h                  |  5 +-
 4 files changed, 57 insertions(+), 34 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index b96a91a1494d..5f30a28a15b0 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -818,21 +818,59 @@ typedef enum atscmh_sccc_code_mode {
 		<title><constant>DTV_GUARD_INTERVAL</constant></title>
 
 		<para>Possible values are:</para>
-<programlisting>
-typedef enum fe_guard_interval {
-	GUARD_INTERVAL_1_32,
-	GUARD_INTERVAL_1_16,
-	GUARD_INTERVAL_1_8,
-	GUARD_INTERVAL_1_4,
-	GUARD_INTERVAL_AUTO,
-	GUARD_INTERVAL_1_128,
-	GUARD_INTERVAL_19_128,
-	GUARD_INTERVAL_19_256,
-	GUARD_INTERVAL_PN420,
-	GUARD_INTERVAL_PN595,
-	GUARD_INTERVAL_PN945,
-} fe_guard_interval_t;
-</programlisting>
+
+<section id="fe-guard-interval-t">
+<title>Modulation guard interval</title>
+
+<table pgwide="1" frame="none" id="fe-guard-interval">
+    <title>enum fe_guard_interval</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>GUARD_INTERVAL_AUTO</entry>
+	    <entry>Autodetect the guard interval</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_128</entry>
+	    <entry>Guard interval 1/128</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_32</entry>
+	    <entry>Guard interval 1/32</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_16</entry>
+	    <entry>Guard interval 1/16</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_8</entry>
+	    <entry>Guard interval 1/8</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_4</entry>
+	    <entry>Guard interval 1/4</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_19_128</entry>
+	    <entry>Guard interval 19/128</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_19_256</entry>
+	    <entry>Guard interval 19/256</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_PN420</entry>
+	    <entry>PN length 420 (1/4)</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_PN595</entry>
+	    <entry>PN length 595 (1/6)</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_PN945</entry>
+	    <entry>PN length 945 (1/9)</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
 
 		<para>Notes:</para>
 		<para>1) If <constant>DTV_GUARD_INTERVAL</constant> is set the <constant>GUARD_INTERVAL_AUTO</constant> the hardware will
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 563800eb1216..a005c4b472f5 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -59,22 +59,6 @@ specification is available at
 <section>
 <title>More OFDM parameters</title>
 
-<section id="fe-guard-interval-t">
-<title>frontend guard inverval</title>
-<programlisting>
-typedef enum fe_guard_interval {
-	GUARD_INTERVAL_1_32,
-	GUARD_INTERVAL_1_16,
-	GUARD_INTERVAL_1_8,
-	GUARD_INTERVAL_1_4,
-	GUARD_INTERVAL_AUTO,
-	GUARD_INTERVAL_1_128,
-	GUARD_INTERVAL_19_128,
-	GUARD_INTERVAL_19_256,
-} fe_guard_interval_t;
-</programlisting>
-</section>
-
 <section id="fe-hierarchy-t">
 <title>frontend hierarchy</title>
 <programlisting>
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index c1dfbd8096bd..d20f1fd75fa9 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -182,7 +182,7 @@ struct dvb_vsb_parameters {
 	 &fe-code-rate-t;      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
 	 &fe-transmit-mode-t;  transmission_mode;
-	 fe_guard_interval_t guard_interval;
+	 &fe-guard-interval-t; guard_interval;
 	 fe_hierarchy_t      hierarchy_information;
  };
 </programlisting>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 49f6e980125b..1d2b7c6dee04 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -228,7 +228,7 @@ enum fe_bandwidth {
 typedef enum fe_bandwidth fe_bandwidth_t;
 #endif
 
-typedef enum fe_guard_interval {
+enum fe_guard_interval {
 	GUARD_INTERVAL_1_32,
 	GUARD_INTERVAL_1_16,
 	GUARD_INTERVAL_1_8,
@@ -240,8 +240,9 @@ typedef enum fe_guard_interval {
 	GUARD_INTERVAL_PN420,
 	GUARD_INTERVAL_PN595,
 	GUARD_INTERVAL_PN945,
-} fe_guard_interval_t;
+};
 
+typedef enum fe_guard_interval fe_guard_interval_t;
 
 typedef enum fe_hierarchy {
 	HIERARCHY_NONE,
-- 
cgit v1.2.3


From 9df4fc5b8f34383d116a160809e782b4ca50a808 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 22:06:56 -0300
Subject: [media] DocBook: improve documentation for hierarchy

Format it as a table and links it with the legacy API xml.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 46 +++++++++++++++++-----
 Documentation/DocBook/media/dvb/frontend.xml       | 18 ---------
 .../DocBook/media/dvb/frontend_legacy_api.xml      |  2 +-
 include/uapi/linux/dvb/frontend.h                  |  6 ++-
 4 files changed, 42 insertions(+), 30 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 5f30a28a15b0..ae9bc1e089cc 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -951,15 +951,43 @@ typedef enum atscmh_sccc_code_mode {
 	<section id="DTV-HIERARCHY">
 	<title><constant>DTV_HIERARCHY</constant></title>
 	<para>Frontend hierarchy</para>
-	<programlisting>
-typedef enum fe_hierarchy {
-	 HIERARCHY_NONE,
-	 HIERARCHY_1,
-	 HIERARCHY_2,
-	 HIERARCHY_4,
-	 HIERARCHY_AUTO
- } fe_hierarchy_t;
-	</programlisting>
+
+
+<section id="fe-hierarchy-t">
+<title>Frontend hierarchy</title>
+
+<table pgwide="1" frame="none" id="fe-hierarchy">
+    <title>enum fe_hierarchy</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	     <entry>HIERARCHY_NONE</entry>
+	    <entry>No hierarchy</entry>
+	</row><row>
+	     <entry>HIERARCHY_AUTO</entry>
+	    <entry>Autodetect hierarchy (if supported)</entry>
+	</row><row>
+	     <entry>HIERARCHY_1</entry>
+	    <entry>Hierarchy 1</entry>
+	</row><row>
+	     <entry>HIERARCHY_2</entry>
+	    <entry>Hierarchy 2</entry>
+	</row><row>
+	     <entry>HIERARCHY_4</entry>
+	    <entry>Hierarchy 4</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
 	</section>
 	<section id="DTV-STREAM-ID">
 	<title><constant>DTV_STREAM_ID</constant></title>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index a005c4b472f5..d81b3ff33295 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -56,24 +56,6 @@ specification is available at
 
 &sub-dvbproperty;
 
-<section>
-<title>More OFDM parameters</title>
-
-<section id="fe-hierarchy-t">
-<title>frontend hierarchy</title>
-<programlisting>
-typedef enum fe_hierarchy {
-	 HIERARCHY_NONE,
-	 HIERARCHY_1,
-	 HIERARCHY_2,
-	 HIERARCHY_4,
-	 HIERARCHY_AUTO
- } fe_hierarchy_t;
-</programlisting>
-</section>
-
-</section>
-
 <section id="frontend_fcalls">
 <title>Frontend Function Calls</title>
 
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index d20f1fd75fa9..cb2e18381305 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -183,7 +183,7 @@ struct dvb_vsb_parameters {
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
 	 &fe-transmit-mode-t;  transmission_mode;
 	 &fe-guard-interval-t; guard_interval;
-	 fe_hierarchy_t      hierarchy_information;
+	 &fe-hierarchy-t;      hierarchy_information;
  };
 </programlisting>
 </section>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 1d2b7c6dee04..3a7ff9002654 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -244,13 +244,15 @@ enum fe_guard_interval {
 
 typedef enum fe_guard_interval fe_guard_interval_t;
 
-typedef enum fe_hierarchy {
+enum fe_hierarchy {
 	HIERARCHY_NONE,
 	HIERARCHY_1,
 	HIERARCHY_2,
 	HIERARCHY_4,
 	HIERARCHY_AUTO
-} fe_hierarchy_t;
+};
+
+typedef enum fe_hierarchy fe_hierarchy_t;
 
 enum fe_interleaving {
 	INTERLEAVING_NONE,
-- 
cgit v1.2.3


From 2e5e435fb4fdcc64db49e903baddb1ea8827385e Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Mon, 4 May 2015 05:07:30 -0300
Subject: [media] media/v4l2-core: Add support for V4L2_PIX_FMT_Y16_BE

16 bit greyscale format, structured in Big Endian. Such a format can be
converted into a PMN image just by adding a header.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 3 ++-
 include/uapi/linux/videodev2.h       | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 14766029bf49..c5677e4030bb 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1146,6 +1146,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 	case V4L2_PIX_FMT_Y10:		descr = "10-bit Greyscale"; break;
 	case V4L2_PIX_FMT_Y12:		descr = "12-bit Greyscale"; break;
 	case V4L2_PIX_FMT_Y16:		descr = "16-bit Greyscale"; break;
+	case V4L2_PIX_FMT_Y16_BE:	descr = "16-bit Greyscale BE"; break;
 	case V4L2_PIX_FMT_Y10BPACK:	descr = "10-bit Greyscale (Packed)"; break;
 	case V4L2_PIX_FMT_PAL8:		descr = "8-bit Palette"; break;
 	case V4L2_PIX_FMT_UV8:		descr = "8-bit Chrominance UV 4-4"; break;
@@ -2539,7 +2540,7 @@ static long __video_do_ioctl(struct file *file,
 	if (v4l2_is_known_ioctl(cmd)) {
 		info = &v4l2_ioctls[_IOC_NR(cmd)];
 
-	        if (!test_bit(_IOC_NR(cmd), vfd->valid_ioctls) &&
+		if (!test_bit(_IOC_NR(cmd), vfd->valid_ioctls) &&
 		    !((info->flags & INFO_FL_CTRL) && vfh && vfh->ctrl_handler))
 			goto done;
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 0f5a4673f3e4..bda496adb50b 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -404,6 +404,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Y10     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
 #define V4L2_PIX_FMT_Y12     v4l2_fourcc('Y', '1', '2', ' ') /* 12  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
+#define V4L2_PIX_FMT_Y16_BE  v4l2_fourcc_be('Y', '1', '6', ' ') /* 16  Greyscale BE  */
 
 /* Grey bit-packed formats */
 #define V4L2_PIX_FMT_Y10BPACK    v4l2_fourcc('Y', '1', '0', 'B') /* 10  Greyscale bit-packed */
-- 
cgit v1.2.3


From e01dfc01914ab9a078ca8d08287c19c6663b5438 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 15 May 2015 09:29:05 -0300
Subject: [media] videodev2.h: add COLORSPACE_DEFAULT

V4L2_COLORSPACE_DEFAULT is added so we have a specific define for
the default case where applications do not set it but leave it to 0.
In that case the driver will set the colorspace based on what it
captures.

This is already used, but we never had a define for the value 0.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index bda496adb50b..c5e89ab21cd9 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -178,6 +178,12 @@ enum v4l2_memory {
 
 /* see also http://vektor.theorem.ca/graphics/ycbcr/ */
 enum v4l2_colorspace {
+	/*
+	 * Default colorspace, i.e. let the driver figure it out.
+	 * Can only be used with video capture.
+	 */
+	V4L2_COLORSPACE_DEFAULT       = 0,
+
 	/* SMPTE 170M: used for broadcast NTSC/PAL SDTV */
 	V4L2_COLORSPACE_SMPTE170M     = 1,
 
-- 
cgit v1.2.3


From addad1050827136e4f8d22c5c81df42f88f44651 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 15 May 2015 09:29:07 -0300
Subject: [media] videodev2.h: add COLORSPACE_RAW

V4L2_COLORSPACE_RAW is added for raw image formats where the picture
is minimally processed and is in the internal colorspace of the sensor.

This is typically used in digital cameras where the image processing is
done later.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index c5e89ab21cd9..81045aaabec9 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -226,6 +226,9 @@ enum v4l2_colorspace {
 
 	/* BT.2020 colorspace, used for UHDTV. */
 	V4L2_COLORSPACE_BT2020        = 10,
+
+	/* Raw colorspace: for RAW unprocessed images */
+	V4L2_COLORSPACE_RAW           = 11,
 };
 
 enum v4l2_ycbcr_encoding {
-- 
cgit v1.2.3


From 3818c4da43af9e67fed66174cc25f8fce4043d99 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 15 May 2015 09:29:09 -0300
Subject: [media] videodev2.h: add macros to map colorspace defaults

The mapping of COLORSPACE_DEFAULT, YCBCR_ENC_DEFAULT or QUANTIZATION_DEFAULT
to proper non-default values is fairly complex, and it is something that
needs to be done both in the kernel and in userspace.

So add macros that can do this conversion, making this available to both
kernel and userspace.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 81045aaabec9..003a91292a4f 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -231,6 +231,15 @@ enum v4l2_colorspace {
 	V4L2_COLORSPACE_RAW           = 11,
 };
 
+/*
+ * Determine how COLORSPACE_DEFAULT should map to a proper colorspace.
+ * This depends on whether this is a SDTV image (use SMPTE 170M), an
+ * HDTV image (use Rec. 709), or something else (use sRGB).
+ */
+#define V4L2_MAP_COLORSPACE_DEFAULT(is_sdtv, is_hdtv) \
+	((is_sdtv) ? V4L2_COLORSPACE_SMPTE170M : \
+	 ((is_hdtv) ? V4L2_COLORSPACE_REC709 : V4L2_COLORSPACE_SRGB))
+
 enum v4l2_ycbcr_encoding {
 	/*
 	 * Mapping of V4L2_YCBCR_ENC_DEFAULT to actual encodings for the
@@ -275,6 +284,16 @@ enum v4l2_ycbcr_encoding {
 	V4L2_YCBCR_ENC_SMPTE240M      = 8,
 };
 
+/*
+ * Determine how YCBCR_ENC_DEFAULT should map to a proper Y'CbCr encoding.
+ * This depends on the colorspace.
+ */
+#define V4L2_MAP_YCBCR_ENC_DEFAULT(colsp) \
+	((colsp) == V4L2_COLORSPACE_REC709 ? V4L2_YCBCR_ENC_709 : \
+	 ((colsp) == V4L2_COLORSPACE_BT2020 ? V4L2_YCBCR_ENC_BT2020 : \
+	  ((colsp) == V4L2_COLORSPACE_SMPTE240M ? V4L2_YCBCR_ENC_SMPTE240M : \
+	   V4L2_YCBCR_ENC_601)))
+
 enum v4l2_quantization {
 	/*
 	 * The default for R'G'B' quantization is always full range, except
@@ -287,6 +306,17 @@ enum v4l2_quantization {
 	V4L2_QUANTIZATION_LIM_RANGE   = 2,
 };
 
+/*
+ * Determine how QUANTIZATION_DEFAULT should map to a proper quantization.
+ * This depends on whether the image is RGB or not, the colorspace and the
+ * Y'CbCr encoding.
+ */
+#define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb, colsp, ycbcr_enc) \
+	(((is_rgb) && (colsp) == V4L2_COLORSPACE_BT2020) ? V4L2_QUANTIZATION_LIM_RANGE : \
+	 (((is_rgb) || (ycbcr_enc) == V4L2_YCBCR_ENC_XV601 || \
+	  (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) ? \
+	 V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE))
+
 enum v4l2_priority {
 	V4L2_PRIORITY_UNSET       = 0,  /* not initialized */
 	V4L2_PRIORITY_BACKGROUND  = 1,
-- 
cgit v1.2.3


From 37e82c2f974b72c9ab49c787ef7b5bb1aec12768 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 27 May 2015 15:30:39 -0700
Subject: bpf: allow BPF programs access skb->skb_iif and skb->dev->ifindex
 fields

classic BPF already exposes skb->dev->ifindex via SKF_AD_IFINDEX extension.
Allow eBPF program to access it as well. Note that classic aborts execution
of the program if 'skb->dev == NULL' (which is inconvenient for program
writers), whereas eBPF returns zero in such case.
Also expose the 'skb_iif' field, since programs triggered by redirected
packet need to known the original interface index.
Summary:
__skb->ifindex         -> skb->dev->ifindex
__skb->ingress_ifindex -> skb->skb_iif

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h |  2 ++
 net/core/filter.c        | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f0a9af8b4dae..72f3080afa1e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -236,6 +236,8 @@ struct __sk_buff {
 	__u32 vlan_tci;
 	__u32 vlan_proto;
 	__u32 priority;
+	__u32 ingress_ifindex;
+	__u32 ifindex;
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index 3adcca6f17a4..2c30d6632d66 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1499,6 +1499,24 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 				      offsetof(struct sk_buff, priority));
 		break;
 
+	case offsetof(struct __sk_buff, ingress_ifindex):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sk_buff, skb_iif));
+		break;
+
+	case offsetof(struct __sk_buff, ifindex):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+				      dst_reg, src_reg,
+				      offsetof(struct sk_buff, dev));
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+				      offsetof(struct net_device, ifindex));
+		break;
+
 	case offsetof(struct __sk_buff, mark):
 		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
 
-- 
cgit v1.2.3


From 83caf9896a48de5f838227162c25de405c61ea9d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 14 May 2015 14:40:02 +0200
Subject: i2c: add FUNC flag for slave capabilities

So users can check in advance if there is slave support.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/uapi/linux/i2c.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h
index 0e949cbfd333..b0a7dd61eb35 100644
--- a/include/uapi/linux/i2c.h
+++ b/include/uapi/linux/i2c.h
@@ -87,6 +87,7 @@ struct i2c_msg {
 #define I2C_FUNC_PROTOCOL_MANGLING	0x00000004 /* I2C_M_IGNORE_NAK etc. */
 #define I2C_FUNC_SMBUS_PEC		0x00000008
 #define I2C_FUNC_NOSTART		0x00000010 /* I2C_M_NOSTART */
+#define I2C_FUNC_SLAVE			0x00000020
 #define I2C_FUNC_SMBUS_BLOCK_PROC_CALL	0x00008000 /* SMBus 2.0 */
 #define I2C_FUNC_SMBUS_QUICK		0x00010000
 #define I2C_FUNC_SMBUS_READ_BYTE	0x00020000
-- 
cgit v1.2.3


From 1c4b1d73bacc546ba4e42f7eb4cb88c54139820b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 27 May 2015 13:57:46 +0200
Subject: tty: move linux/gsmmux.h to uapi

linux/gsmmux.h defines a user interface and therefore should be
installed with other headers.

Make the file include:
* linux/if.h for IFNAMSIZ
* linux/ioctl.h for _IO* macros

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/gsmmux.h      | 36 ------------------------------------
 include/uapi/linux/Kbuild   |  1 +
 include/uapi/linux/gsmmux.h | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 36 deletions(-)
 delete mode 100644 include/linux/gsmmux.h
 create mode 100644 include/uapi/linux/gsmmux.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/gsmmux.h b/include/linux/gsmmux.h
deleted file mode 100644
index c25e9477f7c3..000000000000
--- a/include/linux/gsmmux.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _LINUX_GSMMUX_H
-#define _LINUX_GSMMUX_H
-
-struct gsm_config
-{
-	unsigned int adaption;
-	unsigned int encapsulation;
-	unsigned int initiator;
-	unsigned int t1;
-	unsigned int t2;
-	unsigned int t3;
-	unsigned int n2;
-	unsigned int mru;
-	unsigned int mtu;
-	unsigned int k;
-	unsigned int i;
-	unsigned int unused[8];		/* Padding for expansion without
-					   breaking stuff */
-};
-
-#define GSMIOC_GETCONF		_IOR('G', 0, struct gsm_config)
-#define GSMIOC_SETCONF		_IOW('G', 1, struct gsm_config)
-
-struct gsm_netconfig {
-	unsigned int adaption;  /* Adaption to use in network mode */
-	unsigned short protocol;/* Protocol to use - only ETH_P_IP supported */
-	unsigned short unused2;
-	char if_name[IFNAMSIZ];	/* interface name format string */
-	__u8 unused[28];        /* For future use */
-};
-
-#define GSMIOC_ENABLE_NET      _IOW('G', 2, struct gsm_netconfig)
-#define GSMIOC_DISABLE_NET     _IO('G', 3)
-
-
-#endif
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 1a0006a76b00..1d3db6a74d1f 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -138,6 +138,7 @@ header-y += genetlink.h
 header-y += gen_stats.h
 header-y += gfs2_ondisk.h
 header-y += gigaset_dev.h
+header-y += gsmmux.h
 header-y += hdlcdrv.h
 header-y += hdlc.h
 header-y += hdreg.h
diff --git a/include/uapi/linux/gsmmux.h b/include/uapi/linux/gsmmux.h
new file mode 100644
index 000000000000..c06742d52856
--- /dev/null
+++ b/include/uapi/linux/gsmmux.h
@@ -0,0 +1,39 @@
+#ifndef _LINUX_GSMMUX_H
+#define _LINUX_GSMMUX_H
+
+#include <linux/if.h>
+#include <linux/ioctl.h>
+
+struct gsm_config
+{
+	unsigned int adaption;
+	unsigned int encapsulation;
+	unsigned int initiator;
+	unsigned int t1;
+	unsigned int t2;
+	unsigned int t3;
+	unsigned int n2;
+	unsigned int mru;
+	unsigned int mtu;
+	unsigned int k;
+	unsigned int i;
+	unsigned int unused[8];		/* Padding for expansion without
+					   breaking stuff */
+};
+
+#define GSMIOC_GETCONF		_IOR('G', 0, struct gsm_config)
+#define GSMIOC_SETCONF		_IOW('G', 1, struct gsm_config)
+
+struct gsm_netconfig {
+	unsigned int adaption;  /* Adaption to use in network mode */
+	unsigned short protocol;/* Protocol to use - only ETH_P_IP supported */
+	unsigned short unused2;
+	char if_name[IFNAMSIZ];	/* interface name format string */
+	__u8 unused[28];        /* For future use */
+};
+
+#define GSMIOC_ENABLE_NET      _IOW('G', 2, struct gsm_netconfig)
+#define GSMIOC_DISABLE_NET     _IO('G', 3)
+
+
+#endif
-- 
cgit v1.2.3


From a28c257c9eb0bd76a4adcac97c07e34044ec71fb Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Fri, 29 May 2015 17:28:07 -0400
Subject: net/rds: Declare SO_RDS_TRANSPORT and RDS_TRANS_* constants in
 uapi/linux/rds.h

User space applications that desire to explicitly select the
underlying transport for a PF_RDS socket may do so by using the
SO_RDS_TRANSPORT socket option at the SOL_RDS level before bind().
The integer argument provided to the socket option would be one
of the RDS_TRANS_* values, e.g., RDS_TRANS_TCP. This commit exports
the constant values need by such applications via <linux/rds.h>

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rds.h | 10 ++++++++++
 net/rds/rds.h            |  5 -----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 91950950aa59..0f9265cb2a96 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -38,6 +38,8 @@
 
 #define RDS_IB_ABI_VERSION		0x301
 
+#define	SOL_RDS		276
+
 /*
  * setsockopt/getsockopt for SOL_RDS
  */
@@ -48,6 +50,14 @@
 #define RDS_RECVERR			5
 #define RDS_CONG_MONITOR		6
 #define RDS_GET_MR_FOR_DEST		7
+#define SO_RDS_TRANSPORT		8
+
+/* supported values for SO_RDS_TRANSPORT */
+#define	RDS_TRANS_IB	0
+#define	RDS_TRANS_IWARP	1
+#define	RDS_TRANS_TCP	2
+#define RDS_TRANS_COUNT	3
+#define	RDS_TRANS_NONE	(~0)
 
 /*
  * Control message types for SOL_RDS.
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0d41155a2258..76db508f73a1 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -408,11 +408,6 @@ struct rds_notifier {
  *                 should try hard not to block.
  */
 
-#define RDS_TRANS_IB	0
-#define RDS_TRANS_IWARP	1
-#define RDS_TRANS_TCP	2
-#define RDS_TRANS_COUNT	3
-
 struct rds_transport {
 	char			t_name[TRANSNAMSIZ];
 	struct list_head	t_item;
-- 
cgit v1.2.3


From e2b836cfb45d27a5efc5b1b65fe2442c53137d9c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 22 Apr 2015 23:59:24 +0200
Subject: uapi/nfs: Add NFSv4.1 ACL definitions

Add the ACL related protocol definitions which were added in the NFSv4.1
specification.

(But we're not using them yet.)

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/uapi/linux/nfs4.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index adc0aff83fbb..2119c7c274d7 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -86,6 +86,10 @@
 #define ACL4_SUPPORT_AUDIT_ACL 0x04
 #define ACL4_SUPPORT_ALARM_ACL 0x08
 
+#define NFS4_ACL_AUTO_INHERIT 0x00000001
+#define NFS4_ACL_PROTECTED    0x00000002
+#define NFS4_ACL_DEFAULTED    0x00000004
+
 #define NFS4_ACE_FILE_INHERIT_ACE             0x00000001
 #define NFS4_ACE_DIRECTORY_INHERIT_ACE        0x00000002
 #define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE     0x00000004
@@ -93,6 +97,7 @@
 #define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG   0x00000010
 #define NFS4_ACE_FAILED_ACCESS_ACE_FLAG       0x00000020
 #define NFS4_ACE_IDENTIFIER_GROUP             0x00000040
+#define NFS4_ACE_INHERITED_ACE                0x00000080
 
 #define NFS4_ACE_READ_DATA                    0x00000001
 #define NFS4_ACE_LIST_DIRECTORY               0x00000001
@@ -106,6 +111,8 @@
 #define NFS4_ACE_DELETE_CHILD                 0x00000040
 #define NFS4_ACE_READ_ATTRIBUTES              0x00000080
 #define NFS4_ACE_WRITE_ATTRIBUTES             0x00000100
+#define NFS4_ACE_WRITE_RETENTION              0x00000200
+#define NFS4_ACE_WRITE_RETENTION_HOLD         0x00000400
 #define NFS4_ACE_DELETE                       0x00010000
 #define NFS4_ACE_READ_ACL                     0x00020000
 #define NFS4_ACE_WRITE_ACL                    0x00040000
-- 
cgit v1.2.3


From ccea74457bbdafe33dce8bffcb5cb183aeb5f2bb Mon Sep 17 00:00:00 2001
From: Neil McKee <neil.mckee@inmon.com>
Date: Tue, 26 May 2015 20:59:43 -0700
Subject: openvswitch: include datapath actions with sampled-packet upcall to
 userspace

If new optional attribute OVS_USERSPACE_ATTR_ACTIONS is added to an
OVS_ACTION_ATTR_USERSPACE action, then include the datapath actions
in the upcall.

This Directly associates the sampled packet with the path it takes
through the virtual switch. Path information currently includes mangling,
encapsulation and decapsulation actions for tunneling protocols GRE,
VXLAN, Geneve, MPLS and QinQ, but this extension requires no further
changes to accommodate datapath actions that may be added in the
future.

Adding path information enhances visibility into complex virtual
networks.

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h |  4 ++++
 net/openvswitch/actions.c        | 23 +++++++++++++++--------
 net/openvswitch/datapath.c       | 18 ++++++++++++++++--
 net/openvswitch/datapath.h       |  2 ++
 4 files changed, 37 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index bbd49a0c46c7..1dab77601c21 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -153,6 +153,8 @@ enum ovs_packet_cmd {
  * flow key against the kernel's.
  * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet.  Used
  * for %OVS_PACKET_CMD_EXECUTE.  It has nested %OVS_ACTION_ATTR_* attributes.
+ * Also used in upcall when %OVS_ACTION_ATTR_USERSPACE has optional
+ * %OVS_USERSPACE_ATTR_ACTIONS attribute.
  * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
  * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
  * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
@@ -528,6 +530,7 @@ enum ovs_sample_attr {
  * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
  * @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get
  * tunnel info.
+ * @OVS_USERSPACE_ATTR_ACTIONS: If present, send actions with upcall.
  */
 enum ovs_userspace_attr {
 	OVS_USERSPACE_ATTR_UNSPEC,
@@ -535,6 +538,7 @@ enum ovs_userspace_attr {
 	OVS_USERSPACE_ATTR_USERDATA,  /* Optional user-specified cookie. */
 	OVS_USERSPACE_ATTR_EGRESS_TUN_PORT,  /* Optional, u32 output port
 					      * to get tunnel info. */
+	OVS_USERSPACE_ATTR_ACTIONS,   /* Optional flag to get actions. */
 	__OVS_USERSPACE_ATTR_MAX
 };
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b491c1c296fe..8a8c0b8b4f63 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -608,17 +608,16 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 }
 
 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
-			    struct sw_flow_key *key, const struct nlattr *attr)
+			    struct sw_flow_key *key, const struct nlattr *attr,
+			    const struct nlattr *actions, int actions_len)
 {
 	struct ovs_tunnel_info info;
 	struct dp_upcall_info upcall;
 	const struct nlattr *a;
 	int rem;
 
+	memset(&upcall, 0, sizeof(upcall));
 	upcall.cmd = OVS_PACKET_CMD_ACTION;
-	upcall.userdata = NULL;
-	upcall.portid = 0;
-	upcall.egress_tun_info = NULL;
 
 	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 		 a = nla_next(a, &rem)) {
@@ -647,6 +646,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			break;
 		}
 
+		case OVS_USERSPACE_ATTR_ACTIONS: {
+			/* Include actions. */
+			upcall.actions = actions;
+			upcall.actions_len = actions_len;
+			break;
+		}
+
 		} /* End of switch. */
 	}
 
@@ -654,7 +660,8 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 }
 
 static int sample(struct datapath *dp, struct sk_buff *skb,
-		  struct sw_flow_key *key, const struct nlattr *attr)
+		  struct sw_flow_key *key, const struct nlattr *attr,
+		  const struct nlattr *actions, int actions_len)
 {
 	const struct nlattr *acts_list = NULL;
 	const struct nlattr *a;
@@ -688,7 +695,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 	 */
 	if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
 		   nla_is_last(a, rem)))
-		return output_userspace(dp, skb, key, a);
+		return output_userspace(dp, skb, key, a, actions, actions_len);
 
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (!skb)
@@ -872,7 +879,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_ACTION_ATTR_USERSPACE:
-			output_userspace(dp, skb, key, a);
+			output_userspace(dp, skb, key, a, attr, len);
 			break;
 
 		case OVS_ACTION_ATTR_HASH:
@@ -916,7 +923,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_ACTION_ATTR_SAMPLE:
-			err = sample(dp, skb, key, a);
+			err = sample(dp, skb, key, a, attr, len);
 			break;
 		}
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 3b90461317ec..ff8c4a4c1609 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -272,10 +272,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 		struct dp_upcall_info upcall;
 		int error;
 
+		memset(&upcall, 0, sizeof(upcall));
 		upcall.cmd = OVS_PACKET_CMD_MISS;
-		upcall.userdata = NULL;
 		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
-		upcall.egress_tun_info = NULL;
 		error = ovs_dp_upcall(dp, skb, key, &upcall);
 		if (unlikely(error))
 			kfree_skb(skb);
@@ -397,6 +396,10 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 	if (upcall_info->egress_tun_info)
 		size += nla_total_size(ovs_tun_key_attr_size());
 
+	/* OVS_PACKET_ATTR_ACTIONS */
+	if (upcall_info->actions_len)
+		size += nla_total_size(upcall_info->actions_len);
+
 	return size;
 }
 
@@ -478,6 +481,17 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 		nla_nest_end(user_skb, nla);
 	}
 
+	if (upcall_info->actions_len) {
+		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
+		err = ovs_nla_put_actions(upcall_info->actions,
+					  upcall_info->actions_len,
+					  user_skb);
+		if (!err)
+			nla_nest_end(user_skb, nla);
+		else
+			nla_nest_cancel(user_skb, nla);
+	}
+
 	/* Only reserve room for attribute header, packet data is added
 	 * in skb_zerocopy() */
 	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4ec4a480b147..cd691e935e08 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -116,6 +116,8 @@ struct ovs_skb_cb {
 struct dp_upcall_info {
 	const struct ovs_tunnel_info *egress_tun_info;
 	const struct nlattr *userdata;
+	const struct nlattr *actions;
+	int actions_len;
 	u32 portid;
 	u8 cmd;
 };
-- 
cgit v1.2.3


From 8760ce58353c2099be35ead62a572ee2d1e83b5b Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 1 Jun 2015 15:51:34 -0400
Subject: geneve: allow user to specify TTL for tunnel frames

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c         | 18 ++++++++++++++----
 include/uapi/linux/if_link.h |  1 +
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b7eafa4c1a67..1675dfdbfa70 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -44,7 +44,8 @@ struct geneve_dev {
 	struct net	   *net;	/* netns for packet i/o */
 	struct net_device  *dev;	/* netdev for geneve tunnel */
 	struct geneve_sock *sock;	/* socket used for geneve tunnel */
-	u8 vni[3];			/* virtual network ID for tunnel */
+	u8                 vni[3];	/* virtual network ID for tunnel */
+	u8                 ttl;		/* TTL override */
 	struct sockaddr_in remote;	/* IPv4 address for link partner */
 	struct list_head   next;	/* geneve's per namespace list */
 };
@@ -184,7 +185,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct flowi4 fl4;
 	int err;
 	__be16 sport;
-	__u8 tos, ttl = 0;
+	__u8 tos, ttl;
 
 	iip = ip_hdr(skb);
 
@@ -207,11 +208,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto rt_tx_error;
 	}
 
-	/* TODO: tos and ttl should be configurable */
+	/* TODO: tos should be configurable */
 
 	tos = ip_tunnel_ecn_encap(0, iip, skb);
 
-	if (IN_MULTICAST(ntohl(fl4.daddr)))
+	ttl = geneve->ttl;
+	if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
 		ttl = 1;
 
 	ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -297,6 +299,7 @@ static void geneve_setup(struct net_device *dev)
 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
 	[IFLA_GENEVE_ID]		= { .type = NLA_U32 },
 	[IFLA_GENEVE_REMOTE]		= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+	[IFLA_GENEVE_TTL]		= { .type = NLA_U8 },
 };
 
 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -364,6 +367,9 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 	if (err)
 		return err;
 
+	if (data[IFLA_GENEVE_TTL])
+		geneve->ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
+
 	list_add(&geneve->next, &gn->geneve_list);
 
 	hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]);
@@ -386,6 +392,7 @@ static size_t geneve_get_size(const struct net_device *dev)
 {
 	return nla_total_size(sizeof(__u32)) +	/* IFLA_GENEVE_ID */
 		nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */
+		nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TTL */
 		0;
 }
 
@@ -402,6 +409,9 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			    geneve->remote.sin_addr.s_addr))
 		goto nla_put_failure;
 
+	if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index afccc9393fef..a834f31db915 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -395,6 +395,7 @@ enum {
 	IFLA_GENEVE_UNSPEC,
 	IFLA_GENEVE_ID,
 	IFLA_GENEVE_REMOTE,
+	IFLA_GENEVE_TTL,
 	__IFLA_GENEVE_MAX
 };
 #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
-- 
cgit v1.2.3


From d89511251f6519599b109dc6cda87a6ab314ed8c Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 1 Jun 2015 15:51:35 -0400
Subject: geneve: allow user to specify TOS info for tunnel frames

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c         | 18 ++++++++++++++----
 include/uapi/linux/if_link.h |  1 +
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 1675dfdbfa70..78d49d186e05 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -46,6 +46,7 @@ struct geneve_dev {
 	struct geneve_sock *sock;	/* socket used for geneve tunnel */
 	u8                 vni[3];	/* virtual network ID for tunnel */
 	u8                 ttl;		/* TTL override */
+	u8                 tos;		/* TOS override */
 	struct sockaddr_in remote;	/* IPv4 address for link partner */
 	struct list_head   next;	/* geneve's per namespace list */
 };
@@ -194,7 +195,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* TODO: port min/max limits should be configurable */
 	sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true);
 
+	tos = geneve->tos;
+	if (tos == 1)
+		tos = ip_tunnel_get_dsfield(iip, skb);
+
 	memset(&fl4, 0, sizeof(fl4));
+	fl4.flowi4_tos = RT_TOS(tos);
 	fl4.daddr = geneve->remote.sin_addr.s_addr;
 	rt = ip_route_output_key(geneve->net, &fl4);
 	if (IS_ERR(rt)) {
@@ -208,9 +214,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto rt_tx_error;
 	}
 
-	/* TODO: tos should be configurable */
-
-	tos = ip_tunnel_ecn_encap(0, iip, skb);
+	tos = ip_tunnel_ecn_encap(tos, iip, skb);
 
 	ttl = geneve->ttl;
 	if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
@@ -300,6 +304,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
 	[IFLA_GENEVE_ID]		= { .type = NLA_U32 },
 	[IFLA_GENEVE_REMOTE]		= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 	[IFLA_GENEVE_TTL]		= { .type = NLA_U8 },
+	[IFLA_GENEVE_TOS]		= { .type = NLA_U8 },
 };
 
 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -370,6 +375,9 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 	if (data[IFLA_GENEVE_TTL])
 		geneve->ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
 
+	if (data[IFLA_GENEVE_TOS])
+		geneve->tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
+
 	list_add(&geneve->next, &gn->geneve_list);
 
 	hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]);
@@ -393,6 +401,7 @@ static size_t geneve_get_size(const struct net_device *dev)
 	return nla_total_size(sizeof(__u32)) +	/* IFLA_GENEVE_ID */
 		nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */
 		nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TTL */
+		nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TOS */
 		0;
 }
 
@@ -409,7 +418,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			    geneve->remote.sin_addr.s_addr))
 		goto nla_put_failure;
 
-	if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl))
+	if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) ||
+	    nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos))
 		goto nla_put_failure;
 
 	return 0;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index a834f31db915..1737b7a8272b 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -396,6 +396,7 @@ enum {
 	IFLA_GENEVE_ID,
 	IFLA_GENEVE_REMOTE,
 	IFLA_GENEVE_TTL,
+	IFLA_GENEVE_TOS,
 	__IFLA_GENEVE_MAX
 };
 #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
-- 
cgit v1.2.3


From aef11009c45ca594c18ecc822f101e3908ca3fb4 Mon Sep 17 00:00:00 2001
From: Yair Shachar <yair.shachar@amd.com>
Date: Sun, 7 Dec 2014 17:05:22 +0200
Subject: drm/amdkfd: add H/W debugger IOCTL set definitions

This patch adds four new IOCTLs to amdkfd. These IOCTLs expose a H/W
debugger functionality to the userspace.

The IOCTLs are:

- AMDKFD_IOC_DBG_REGISTER:

The purpose of this IOCTL is to notify amdkfd that a process wants to use
GPU debugging facilities on itself only.
It is expected that this IOCTL would be called before any other H/W
debugger requests are sent to amdkfd and for each GPU where the H/W
debugging needs to be enabled. The use of this IOCTL ensures that only
one instance of a debugger is active in the system.

- AMDKFD_IOC_DBG_UNREGISTER:

This IOCTL detaches the debugger/debugged process from the H/W
Debug which was established by the AMDKFD_IOC_DBG_REGISTER IOCTL.

- AMDKFD_IOC_DBG_ADDRESS_WATCH:

This IOCTL allows to set different watchpoints with various conditions as
indicated by the IOCTL's arguments. The available number of watchpoints
is retrieved from topology. This operation is confined to the current
debugged process, which was registered through AMDKFD_IOC_DBG_REGISTER.

- AMDKFD_IOC_DBG_WAVE_CONTROL:

This IOCTL allows to control a wavefront as indicated by the IOCTL's
arguments. For example, you can halt/resume or kill either a
single wavefront or a set of wavefronts. This operation is confined to
the current debugged process, which was registered through
AMDKFD_IOC_DBG_REGISTER.

Because the arguments for the address watch IOCTL and wave control IOCTL
are dynamic, meaning that they could vary in size, the userspace passes a
pointer to a structure (in userspace) that contains the value of the
arguments. The kernel driver is responsible to parse this structure and
validate its contents.

v2: change void* to uint64_t inside ioctl arguments

Signed-off-by: Yair Shachar <yair.shachar@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 54 ++++++++++++++++++++++++++++++++
 include/uapi/linux/kfd_ioctl.h           | 43 +++++++++++++++++++++++--
 2 files changed, 95 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b2c6109bd7af..b358e910378f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -432,6 +432,48 @@ out:
 	return err;
 }
 
+static int kfd_ioctl_dbg_register(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	long status = -EFAULT;
+
+	return status;
+}
+
+static int kfd_ioctl_dbg_unrgesiter(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	long status = -EFAULT;
+
+	return status;
+}
+
+/*
+ * Parse and generate variable size data structure for address watch.
+ * Total size of the buffer and # watch points is limited in order
+ * to prevent kernel abuse. (no bearing to the much smaller HW limitation
+ * which is enforced by dbgdev module)
+ * please also note that the watch address itself are not "copied from user",
+ * since it be set into the HW in user mode values.
+ *
+ */
+static int kfd_ioctl_dbg_address_watch(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	long status = -EFAULT;
+
+	return status;
+}
+
+/* Parse and generate fixed size data structure for wave control */
+static int kfd_ioctl_dbg_wave_control(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	long status = -EFAULT;
+
+	return status;
+}
+
 static int kfd_ioctl_get_clock_counters(struct file *filep,
 				struct kfd_process *p, void *data)
 {
@@ -612,6 +654,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
 			kfd_ioctl_wait_events, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+			kfd_ioctl_dbg_register, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+			kfd_ioctl_dbg_unrgesiter, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+			kfd_ioctl_dbg_address_watch, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+			kfd_ioctl_dbg_wave_control, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 4ca35a8f9891..d6833426fdef 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -128,6 +128,32 @@ struct kfd_ioctl_get_process_apertures_args {
 	uint32_t pad;
 };
 
+#define MAX_ALLOWED_NUM_POINTS    100
+#define MAX_ALLOWED_AW_BUFF_SIZE 4096
+#define MAX_ALLOWED_WAC_BUFF_SIZE  128
+
+struct kfd_ioctl_dbg_register_args {
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_unregister_args {
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_address_watch_args {
+	uint64_t content_ptr;		/* a pointer to the actual content */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+};
+
+struct kfd_ioctl_dbg_wave_control_args {
+	uint64_t content_ptr;		/* a pointer to the actual content */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+};
+
 /* Matching HSA_EVENTTYPE */
 #define KFD_IOC_EVENT_SIGNAL			0
 #define KFD_IOC_EVENT_NODECHANGE		1
@@ -198,7 +224,8 @@ struct kfd_event_data {
 };
 
 struct kfd_ioctl_wait_events_args {
-	uint64_t events_ptr;		/* to KFD */
+	uint64_t events_ptr;		/* pointed to struct
+					   kfd_event_data array, to KFD */
 	uint32_t num_events;		/* to KFD */
 	uint32_t wait_for_all;		/* to KFD */
 	uint32_t timeout;		/* to KFD */
@@ -247,7 +274,19 @@ struct kfd_ioctl_wait_events_args {
 #define AMDKFD_IOC_WAIT_EVENTS			\
 		AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
 
+#define AMDKFD_IOC_DBG_REGISTER			\
+		AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
+
+#define AMDKFD_IOC_DBG_UNREGISTER		\
+		AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
+
+#define AMDKFD_IOC_DBG_ADDRESS_WATCH		\
+		AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
+
+#define AMDKFD_IOC_DBG_WAVE_CONTROL		\
+		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x0D
+#define AMDKFD_COMMAND_END		0x11
 
 #endif
-- 
cgit v1.2.3


From dc5698e80cf724770283e10414054662bdf6ccfa Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Mon, 9 Sep 2013 10:02:56 +1000
Subject: Add virtio gpu driver.

This patch adds a kms driver for the virtio gpu.  The xorg modesetting
driver can handle the device just fine, the framebuffer for fbcon is
there too.

Qemu patches for the host side are under review currently.

The pci version of the device comes in two variants: with and without
vga compatibility.  The former has a extra memory bar for the vga
framebuffer, the later is a pure virtio device.  The only concern for
this driver is that in the virtio-vga case we have to kick out the
firmware framebuffer.

Initial revision has only 2d support, 3d (virgl) support requires
some more work on the qemu side and will be added later.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/gpu/drm/Kconfig                  |   2 +
 drivers/gpu/drm/Makefile                 |   1 +
 drivers/gpu/drm/virtio/Kconfig           |  14 +
 drivers/gpu/drm/virtio/Makefile          |  11 +
 drivers/gpu/drm/virtio/virtgpu_debugfs.c |  64 ++++
 drivers/gpu/drm/virtio/virtgpu_display.c | 473 ++++++++++++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_drm_bus.c |  91 +++++
 drivers/gpu/drm/virtio/virtgpu_drv.c     | 136 +++++++
 drivers/gpu/drm/virtio/virtgpu_drv.h     | 350 ++++++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_fb.c      | 431 ++++++++++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_fence.c   | 119 ++++++
 drivers/gpu/drm/virtio/virtgpu_gem.c     | 140 +++++++
 drivers/gpu/drm/virtio/virtgpu_kms.c     | 173 +++++++++
 drivers/gpu/drm/virtio/virtgpu_object.c  | 170 +++++++++
 drivers/gpu/drm/virtio/virtgpu_plane.c   | 120 ++++++
 drivers/gpu/drm/virtio/virtgpu_ttm.c     | 469 +++++++++++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_vq.c      | 614 +++++++++++++++++++++++++++++++
 include/drm/drmP.h                       |   1 +
 include/uapi/linux/Kbuild                |   1 +
 include/uapi/linux/virtio_gpu.h          | 204 ++++++++++
 include/uapi/linux/virtio_ids.h          |   1 +
 21 files changed, 3585 insertions(+)
 create mode 100644 drivers/gpu/drm/virtio/Kconfig
 create mode 100644 drivers/gpu/drm/virtio/Makefile
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_debugfs.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_display.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_drm_bus.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_drv.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_drv.h
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_fb.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_fence.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_gem.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_kms.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_object.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_plane.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_ttm.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_vq.c
 create mode 100644 include/uapi/linux/virtio_gpu.h

(limited to 'include/uapi/linux')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 47f2ce81b412..d4b65457122d 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -206,6 +206,8 @@ source "drivers/gpu/drm/qxl/Kconfig"
 
 source "drivers/gpu/drm/bochs/Kconfig"
 
+source "drivers/gpu/drm/virtio/Kconfig"
+
 source "drivers/gpu/drm/msm/Kconfig"
 
 source "drivers/gpu/drm/tegra/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 4de8d9b006ec..8e75f8194bde 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_DRM_OMAP)	+= omapdrm/
 obj-$(CONFIG_DRM_TILCDC)	+= tilcdc/
 obj-$(CONFIG_DRM_QXL) += qxl/
 obj-$(CONFIG_DRM_BOCHS) += bochs/
+obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
 obj-$(CONFIG_DRM_MSM) += msm/
 obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-$(CONFIG_DRM_STI) += sti/
diff --git a/drivers/gpu/drm/virtio/Kconfig b/drivers/gpu/drm/virtio/Kconfig
new file mode 100644
index 000000000000..9983eadb81b6
--- /dev/null
+++ b/drivers/gpu/drm/virtio/Kconfig
@@ -0,0 +1,14 @@
+config DRM_VIRTIO_GPU
+	tristate "Virtio GPU driver"
+	depends on DRM && VIRTIO
+	select FB_SYS_FILLRECT
+	select FB_SYS_COPYAREA
+	select FB_SYS_IMAGEBLIT
+        select DRM_KMS_HELPER
+        select DRM_KMS_FB_HELPER
+        select DRM_TTM
+	help
+	   This is the virtual GPU driver for virtio.  It can be used with
+           QEMU based VMMs (like KVM or Xen).
+
+	   If unsure say M.
diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile
new file mode 100644
index 000000000000..2ee1602d77d4
--- /dev/null
+++ b/drivers/gpu/drm/virtio/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+ccflags-y := -Iinclude/drm
+
+virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_drm_bus.o virtgpu_gem.o \
+	virtgpu_fb.o virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \
+	virtgpu_fence.o virtgpu_object.o virtgpu_debugfs.o virtgpu_plane.o
+
+obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio-gpu.o
diff --git a/drivers/gpu/drm/virtio/virtgpu_debugfs.c b/drivers/gpu/drm/virtio/virtgpu_debugfs.c
new file mode 100644
index 000000000000..db8b49101a8b
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_debugfs.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+
+#include "drmP.h"
+#include "virtgpu_drv.h"
+
+static int
+virtio_gpu_debugfs_irq_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct virtio_gpu_device *vgdev = node->minor->dev->dev_private;
+
+	seq_printf(m, "fence %ld %lld\n",
+		   atomic64_read(&vgdev->fence_drv.last_seq),
+		   vgdev->fence_drv.sync_seq);
+	return 0;
+}
+
+static struct drm_info_list virtio_gpu_debugfs_list[] = {
+	{ "irq_fence", virtio_gpu_debugfs_irq_info, 0, NULL },
+};
+
+#define VIRTIO_GPU_DEBUGFS_ENTRIES ARRAY_SIZE(virtio_gpu_debugfs_list)
+
+int
+virtio_gpu_debugfs_init(struct drm_minor *minor)
+{
+	drm_debugfs_create_files(virtio_gpu_debugfs_list,
+				 VIRTIO_GPU_DEBUGFS_ENTRIES,
+				 minor->debugfs_root, minor);
+	return 0;
+}
+
+void
+virtio_gpu_debugfs_takedown(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(virtio_gpu_debugfs_list,
+				 VIRTIO_GPU_DEBUGFS_ENTRIES,
+				 minor);
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
new file mode 100644
index 000000000000..4e160efc9402
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ *    Dave Airlie
+ *    Alon Levy
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "virtgpu_drv.h"
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_atomic_helper.h>
+
+#define XRES_MIN   320
+#define YRES_MIN   200
+
+#define XRES_DEF  1024
+#define YRES_DEF   768
+
+#define XRES_MAX  8192
+#define YRES_MAX  8192
+
+static void virtio_gpu_crtc_gamma_set(struct drm_crtc *crtc,
+				      u16 *red, u16 *green, u16 *blue,
+				      uint32_t start, uint32_t size)
+{
+	/* TODO */
+}
+
+static void
+virtio_gpu_hide_cursor(struct virtio_gpu_device *vgdev,
+		       struct virtio_gpu_output *output)
+{
+	output->cursor.hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_UPDATE_CURSOR);
+	output->cursor.resource_id = 0;
+	virtio_gpu_cursor_ping(vgdev, output);
+}
+
+static int virtio_gpu_crtc_cursor_set(struct drm_crtc *crtc,
+				      struct drm_file *file_priv,
+				      uint32_t handle,
+				      uint32_t width,
+				      uint32_t height,
+				      int32_t hot_x, int32_t hot_y)
+{
+	struct virtio_gpu_device *vgdev = crtc->dev->dev_private;
+	struct virtio_gpu_output *output =
+		container_of(crtc, struct virtio_gpu_output, crtc);
+	struct drm_gem_object *gobj = NULL;
+	struct virtio_gpu_object *qobj = NULL;
+	struct virtio_gpu_fence *fence = NULL;
+	int ret = 0;
+
+	if (handle == 0) {
+		virtio_gpu_hide_cursor(vgdev, output);
+		return 0;
+	}
+
+	/* lookup the cursor */
+	gobj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (gobj == NULL)
+		return -ENOENT;
+
+	qobj = gem_to_virtio_gpu_obj(gobj);
+
+	if (!qobj->hw_res_handle) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	virtio_gpu_cmd_transfer_to_host_2d(vgdev, qobj->hw_res_handle, 0,
+					   cpu_to_le32(64),
+					   cpu_to_le32(64),
+					   0, 0, &fence);
+
+	output->cursor.hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_UPDATE_CURSOR);
+	output->cursor.resource_id = cpu_to_le32(qobj->hw_res_handle);
+	output->cursor.hot_x = cpu_to_le32(hot_x);
+	output->cursor.hot_y = cpu_to_le32(hot_y);
+	virtio_gpu_cursor_ping(vgdev, output);
+	ret = 0;
+
+out:
+	drm_gem_object_unreference_unlocked(gobj);
+	return ret;
+}
+
+static int virtio_gpu_crtc_cursor_move(struct drm_crtc *crtc,
+				    int x, int y)
+{
+	struct virtio_gpu_device *vgdev = crtc->dev->dev_private;
+	struct virtio_gpu_output *output =
+		container_of(crtc, struct virtio_gpu_output, crtc);
+
+	output->cursor.hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_MOVE_CURSOR);
+	output->cursor.pos.x = cpu_to_le32(x);
+	output->cursor.pos.y = cpu_to_le32(y);
+	virtio_gpu_cursor_ping(vgdev, output);
+	return 0;
+}
+
+static const struct drm_crtc_funcs virtio_gpu_crtc_funcs = {
+	.cursor_set2            = virtio_gpu_crtc_cursor_set,
+	.cursor_move            = virtio_gpu_crtc_cursor_move,
+	.gamma_set              = virtio_gpu_crtc_gamma_set,
+	.set_config             = drm_atomic_helper_set_config,
+	.destroy                = drm_crtc_cleanup,
+
+#if 0 /* not (yet) working without vblank support according to docs */
+	.page_flip              = drm_atomic_helper_page_flip,
+#endif
+	.reset                  = drm_atomic_helper_crtc_reset,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state   = drm_atomic_helper_crtc_destroy_state,
+};
+
+static void virtio_gpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
+{
+	struct virtio_gpu_framebuffer *virtio_gpu_fb
+		= to_virtio_gpu_framebuffer(fb);
+
+	if (virtio_gpu_fb->obj)
+		drm_gem_object_unreference_unlocked(virtio_gpu_fb->obj);
+	drm_framebuffer_cleanup(fb);
+	kfree(virtio_gpu_fb);
+}
+
+static int
+virtio_gpu_framebuffer_surface_dirty(struct drm_framebuffer *fb,
+				     struct drm_file *file_priv,
+				     unsigned flags, unsigned color,
+				     struct drm_clip_rect *clips,
+				     unsigned num_clips)
+{
+	struct virtio_gpu_framebuffer *virtio_gpu_fb
+		= to_virtio_gpu_framebuffer(fb);
+
+	return virtio_gpu_surface_dirty(virtio_gpu_fb, clips, num_clips);
+}
+
+static const struct drm_framebuffer_funcs virtio_gpu_fb_funcs = {
+	.destroy = virtio_gpu_user_framebuffer_destroy,
+	.dirty = virtio_gpu_framebuffer_surface_dirty,
+};
+
+int
+virtio_gpu_framebuffer_init(struct drm_device *dev,
+			    struct virtio_gpu_framebuffer *vgfb,
+			    struct drm_mode_fb_cmd2 *mode_cmd,
+			    struct drm_gem_object *obj)
+{
+	int ret;
+	struct virtio_gpu_object *bo;
+	vgfb->obj = obj;
+
+	bo = gem_to_virtio_gpu_obj(obj);
+
+	ret = drm_framebuffer_init(dev, &vgfb->base, &virtio_gpu_fb_funcs);
+	if (ret) {
+		vgfb->obj = NULL;
+		return ret;
+	}
+	drm_helper_mode_fill_fb_struct(&vgfb->base, mode_cmd);
+
+	spin_lock_init(&vgfb->dirty_lock);
+	vgfb->x1 = vgfb->y1 = INT_MAX;
+	vgfb->x2 = vgfb->y2 = 0;
+	return 0;
+}
+
+static bool virtio_gpu_crtc_mode_fixup(struct drm_crtc *crtc,
+				       const struct drm_display_mode *mode,
+				       struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void virtio_gpu_crtc_mode_set_nofb(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc);
+
+	virtio_gpu_cmd_set_scanout(vgdev, output->index, 0,
+				   crtc->mode.hdisplay,
+				   crtc->mode.vdisplay, 0, 0);
+}
+
+static void virtio_gpu_crtc_enable(struct drm_crtc *crtc)
+{
+}
+
+static void virtio_gpu_crtc_disable(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc);
+
+	virtio_gpu_cmd_set_scanout(vgdev, output->index, 0, 0, 0, 0, 0);
+}
+
+static int virtio_gpu_crtc_atomic_check(struct drm_crtc *crtc,
+					struct drm_crtc_state *state)
+{
+	return 0;
+}
+
+static const struct drm_crtc_helper_funcs virtio_gpu_crtc_helper_funcs = {
+	.enable        = virtio_gpu_crtc_enable,
+	.disable       = virtio_gpu_crtc_disable,
+	.mode_fixup    = virtio_gpu_crtc_mode_fixup,
+	.mode_set_nofb = virtio_gpu_crtc_mode_set_nofb,
+	.atomic_check  = virtio_gpu_crtc_atomic_check,
+};
+
+static bool virtio_gpu_enc_mode_fixup(struct drm_encoder *encoder,
+				      const struct drm_display_mode *mode,
+				      struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void virtio_gpu_enc_mode_set(struct drm_encoder *encoder,
+				    struct drm_display_mode *mode,
+				    struct drm_display_mode *adjusted_mode)
+{
+}
+
+static void virtio_gpu_enc_enable(struct drm_encoder *encoder)
+{
+}
+
+static void virtio_gpu_enc_disable(struct drm_encoder *encoder)
+{
+}
+
+static int virtio_gpu_conn_get_modes(struct drm_connector *connector)
+{
+	struct virtio_gpu_output *output =
+		drm_connector_to_virtio_gpu_output(connector);
+	struct drm_display_mode *mode = NULL;
+	int count, width, height;
+
+	width  = le32_to_cpu(output->info.r.width);
+	height = le32_to_cpu(output->info.r.height);
+	count = drm_add_modes_noedid(connector, XRES_MAX, YRES_MAX);
+
+	if (width == 0 || height == 0) {
+		width = XRES_DEF;
+		height = YRES_DEF;
+		drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF);
+	} else {
+		DRM_DEBUG("add mode: %dx%d\n", width, height);
+		mode = drm_cvt_mode(connector->dev, width, height, 60,
+				    false, false, false);
+		mode->type |= DRM_MODE_TYPE_PREFERRED;
+		drm_mode_probed_add(connector, mode);
+		count++;
+	}
+
+	return count;
+}
+
+static int virtio_gpu_conn_mode_valid(struct drm_connector *connector,
+				      struct drm_display_mode *mode)
+{
+	struct virtio_gpu_output *output =
+		drm_connector_to_virtio_gpu_output(connector);
+	int width, height;
+
+	width  = le32_to_cpu(output->info.r.width);
+	height = le32_to_cpu(output->info.r.height);
+
+	if (!(mode->type & DRM_MODE_TYPE_PREFERRED))
+		return MODE_OK;
+	if (mode->hdisplay == XRES_DEF && mode->vdisplay == YRES_DEF)
+		return MODE_OK;
+	if (mode->hdisplay <= width  && mode->hdisplay >= width - 16 &&
+	    mode->vdisplay <= height && mode->vdisplay >= height - 16)
+		return MODE_OK;
+
+	DRM_DEBUG("del mode: %dx%d\n", mode->hdisplay, mode->vdisplay);
+	return MODE_BAD;
+}
+
+static struct drm_encoder*
+virtio_gpu_best_encoder(struct drm_connector *connector)
+{
+	struct virtio_gpu_output *virtio_gpu_output =
+		drm_connector_to_virtio_gpu_output(connector);
+
+	return &virtio_gpu_output->enc;
+}
+
+static const struct drm_encoder_helper_funcs virtio_gpu_enc_helper_funcs = {
+	.mode_fixup = virtio_gpu_enc_mode_fixup,
+	.mode_set   = virtio_gpu_enc_mode_set,
+	.enable     = virtio_gpu_enc_enable,
+	.disable    = virtio_gpu_enc_disable,
+};
+
+static const struct drm_connector_helper_funcs virtio_gpu_conn_helper_funcs = {
+	.get_modes    = virtio_gpu_conn_get_modes,
+	.mode_valid   = virtio_gpu_conn_mode_valid,
+	.best_encoder = virtio_gpu_best_encoder,
+};
+
+static void virtio_gpu_conn_save(struct drm_connector *connector)
+{
+	DRM_DEBUG("\n");
+}
+
+static void virtio_gpu_conn_restore(struct drm_connector *connector)
+{
+	DRM_DEBUG("\n");
+}
+
+static enum drm_connector_status virtio_gpu_conn_detect(
+			struct drm_connector *connector,
+			bool force)
+{
+	struct virtio_gpu_output *output =
+		drm_connector_to_virtio_gpu_output(connector);
+
+	if (output->info.enabled)
+		return connector_status_connected;
+	else
+		return connector_status_disconnected;
+}
+
+static void virtio_gpu_conn_destroy(struct drm_connector *connector)
+{
+	struct virtio_gpu_output *virtio_gpu_output =
+		drm_connector_to_virtio_gpu_output(connector);
+
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+	kfree(virtio_gpu_output);
+}
+
+static const struct drm_connector_funcs virtio_gpu_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.save = virtio_gpu_conn_save,
+	.restore = virtio_gpu_conn_restore,
+	.detect = virtio_gpu_conn_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = virtio_gpu_conn_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_encoder_funcs virtio_gpu_enc_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index)
+{
+	struct drm_device *dev = vgdev->ddev;
+	struct virtio_gpu_output *output = vgdev->outputs + index;
+	struct drm_connector *connector = &output->conn;
+	struct drm_encoder *encoder = &output->enc;
+	struct drm_crtc *crtc = &output->crtc;
+	struct drm_plane *plane;
+
+	output->index = index;
+	if (index == 0) {
+		output->info.enabled = cpu_to_le32(true);
+		output->info.r.width = cpu_to_le32(XRES_DEF);
+		output->info.r.height = cpu_to_le32(YRES_DEF);
+	}
+
+	plane = virtio_gpu_plane_init(vgdev, index);
+	if (IS_ERR(plane))
+		return PTR_ERR(plane);
+	drm_crtc_init_with_planes(dev, crtc, plane, NULL,
+				  &virtio_gpu_crtc_funcs);
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+	drm_crtc_helper_add(crtc, &virtio_gpu_crtc_helper_funcs);
+	plane->crtc = crtc;
+
+	drm_connector_init(dev, connector, &virtio_gpu_connector_funcs,
+			   DRM_MODE_CONNECTOR_VIRTUAL);
+	drm_connector_helper_add(connector, &virtio_gpu_conn_helper_funcs);
+
+	drm_encoder_init(dev, encoder, &virtio_gpu_enc_funcs,
+			 DRM_MODE_ENCODER_VIRTUAL);
+	drm_encoder_helper_add(encoder, &virtio_gpu_enc_helper_funcs);
+	encoder->possible_crtcs = 1 << index;
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	drm_connector_register(connector);
+	return 0;
+}
+
+static struct drm_framebuffer *
+virtio_gpu_user_framebuffer_create(struct drm_device *dev,
+				   struct drm_file *file_priv,
+				   struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_gem_object *obj = NULL;
+	struct virtio_gpu_framebuffer *virtio_gpu_fb;
+	int ret;
+
+	/* lookup object associated with res handle */
+	obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (!obj)
+		return ERR_PTR(-EINVAL);
+
+	virtio_gpu_fb = kzalloc(sizeof(*virtio_gpu_fb), GFP_KERNEL);
+	if (virtio_gpu_fb == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, mode_cmd, obj);
+	if (ret) {
+		kfree(virtio_gpu_fb);
+		if (obj)
+			drm_gem_object_unreference_unlocked(obj);
+		return NULL;
+	}
+
+	return &virtio_gpu_fb->base;
+}
+
+static const struct drm_mode_config_funcs virtio_gpu_mode_funcs = {
+	.fb_create = virtio_gpu_user_framebuffer_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev)
+{
+	int i;
+
+	drm_mode_config_init(vgdev->ddev);
+	vgdev->ddev->mode_config.funcs = (void *)&virtio_gpu_mode_funcs;
+
+	/* modes will be validated against the framebuffer size */
+	vgdev->ddev->mode_config.min_width = XRES_MIN;
+	vgdev->ddev->mode_config.min_height = YRES_MIN;
+	vgdev->ddev->mode_config.max_width = XRES_MAX;
+	vgdev->ddev->mode_config.max_height = YRES_MAX;
+
+	for (i = 0 ; i < vgdev->num_scanouts; ++i)
+		vgdev_output_init(vgdev, i);
+
+        drm_mode_config_reset(vgdev->ddev);
+	return 0;
+}
+
+void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev)
+{
+	virtio_gpu_fbdev_fini(vgdev);
+	drm_mode_config_cleanup(vgdev->ddev);
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_drm_bus.c b/drivers/gpu/drm/virtio/virtgpu_drm_bus.c
new file mode 100644
index 000000000000..f4ec816e9468
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_drm_bus.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/pci.h>
+
+#include "virtgpu_drv.h"
+
+int drm_virtio_set_busid(struct drm_device *dev, struct drm_master *master)
+{
+	struct pci_dev *pdev = dev->pdev;
+
+	if (pdev) {
+		return drm_pci_set_busid(dev, master);
+	}
+	return 0;
+}
+
+int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev)
+{
+	struct drm_device *dev;
+	int ret;
+
+	dev = drm_dev_alloc(driver, &vdev->dev);
+	if (!dev)
+		return -ENOMEM;
+	dev->virtdev = vdev;
+	vdev->priv = dev;
+
+	if (strcmp(vdev->dev.parent->bus->name, "pci") == 0) {
+		struct pci_dev *pdev = to_pci_dev(vdev->dev.parent);
+		bool vga = (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
+
+		if (vga) {
+			/*
+			 * Need to make sure we don't have two drivers
+			 * for the same hardware here.  Some day we
+			 * will simply kick out the firmware
+			 * (vesa/efi) framebuffer.
+			 *
+			 * Virtual hardware specs for virtio-vga are
+			 * not finalized yet, therefore we can't add
+			 * code for that yet.
+			 *
+			 * So ignore the device for the time being,
+			 * and suggest to the user use the device
+			 * variant without vga compatibility mode.
+			 */
+			DRM_ERROR("virtio-vga not (yet) supported\n");
+			DRM_ERROR("please use virtio-gpu-pci instead\n");
+			ret = -ENODEV;
+			goto err_free;
+		}
+		dev->pdev = pdev;
+	}
+
+	ret = drm_dev_register(dev, 0);
+	if (ret)
+		goto err_free;
+
+	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", driver->name,
+		 driver->major, driver->minor, driver->patchlevel,
+		 driver->date, dev->primary->index);
+
+	return 0;
+
+err_free:
+	drm_dev_unref(dev);
+	return ret;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
new file mode 100644
index 000000000000..7d9610aaeff9
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ *    Dave Airlie <airlied@redhat.com>
+ *    Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include "drmP.h"
+#include "drm/drm.h"
+
+#include "virtgpu_drv.h"
+static struct drm_driver driver;
+
+static int virtio_gpu_modeset = -1;
+
+MODULE_PARM_DESC(modeset, "Disable/Enable modesetting");
+module_param_named(modeset, virtio_gpu_modeset, int, 0400);
+
+static int virtio_gpu_probe(struct virtio_device *vdev)
+{
+#ifdef CONFIG_VGA_CONSOLE
+	if (vgacon_text_force() && virtio_gpu_modeset == -1)
+		return -EINVAL;
+#endif
+
+	if (virtio_gpu_modeset == 0)
+		return -EINVAL;
+
+	return drm_virtio_init(&driver, vdev);
+}
+
+static void virtio_gpu_remove(struct virtio_device *vdev)
+{
+	struct drm_device *dev = vdev->priv;
+	drm_put_dev(dev);
+}
+
+static void virtio_gpu_config_changed(struct virtio_device *vdev)
+{
+	struct drm_device *dev = vdev->priv;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+
+	schedule_work(&vgdev->config_changed_work);
+}
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_GPU, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static unsigned int features[] = {
+};
+static struct virtio_driver virtio_gpu_driver = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
+	.driver.name = KBUILD_MODNAME,
+	.driver.owner = THIS_MODULE,
+	.id_table = id_table,
+	.probe = virtio_gpu_probe,
+	.remove = virtio_gpu_remove,
+	.config_changed = virtio_gpu_config_changed
+};
+
+module_virtio_driver(virtio_gpu_driver);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio GPU driver");
+MODULE_LICENSE("GPL and additional rights");
+MODULE_AUTHOR("Dave Airlie <airlied@redhat.com>");
+MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
+MODULE_AUTHOR("Alon Levy");
+
+static const struct file_operations virtio_gpu_driver_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.mmap = virtio_gpu_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+	.unlocked_ioctl	= drm_ioctl,
+	.release = drm_release,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = drm_compat_ioctl,
+#endif
+	.llseek = noop_llseek,
+};
+
+
+static struct drm_driver driver = {
+	.driver_features = DRIVER_MODESET | DRIVER_GEM,
+	.set_busid = drm_virtio_set_busid,
+	.load = virtio_gpu_driver_load,
+	.unload = virtio_gpu_driver_unload,
+
+	.dumb_create = virtio_gpu_mode_dumb_create,
+	.dumb_map_offset = virtio_gpu_mode_dumb_mmap,
+	.dumb_destroy = virtio_gpu_mode_dumb_destroy,
+
+#if defined(CONFIG_DEBUG_FS)
+	.debugfs_init = virtio_gpu_debugfs_init,
+	.debugfs_cleanup = virtio_gpu_debugfs_takedown,
+#endif
+
+	.gem_free_object = virtio_gpu_gem_free_object,
+	.fops = &virtio_gpu_driver_fops,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
new file mode 100644
index 000000000000..e5a2c092460b
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef VIRTIO_DRV_H
+#define VIRTIO_DRV_H
+
+#include <linux/virtio.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_gpu.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_crtc_helper.h>
+#include <ttm/ttm_bo_api.h>
+#include <ttm/ttm_bo_driver.h>
+#include <ttm/ttm_placement.h>
+#include <ttm/ttm_module.h>
+
+#define DRIVER_NAME "virtio_gpu"
+#define DRIVER_DESC "virtio GPU"
+#define DRIVER_DATE "0"
+
+#define DRIVER_MAJOR 0
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 1
+
+/* virtgpu_drm_bus.c */
+int drm_virtio_set_busid(struct drm_device *dev, struct drm_master *master);
+int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev);
+
+struct virtio_gpu_object {
+	struct drm_gem_object gem_base;
+	uint32_t hw_res_handle;
+
+	struct sg_table *pages;
+	void *vmap;
+	bool dumb;
+	struct ttm_place                placement_code;
+	struct ttm_placement		placement;
+	struct ttm_buffer_object	tbo;
+	struct ttm_bo_kmap_obj		kmap;
+};
+#define gem_to_virtio_gpu_obj(gobj) \
+	container_of((gobj), struct virtio_gpu_object, gem_base)
+
+struct virtio_gpu_vbuffer;
+struct virtio_gpu_device;
+
+typedef void (*virtio_gpu_resp_cb)(struct virtio_gpu_device *vgdev,
+				   struct virtio_gpu_vbuffer *vbuf);
+
+struct virtio_gpu_fence_driver {
+	atomic64_t       last_seq;
+	uint64_t         sync_seq;
+	struct list_head fences;
+	spinlock_t       lock;
+};
+
+struct virtio_gpu_fence {
+	struct fence f;
+	struct virtio_gpu_fence_driver *drv;
+	struct list_head node;
+	uint64_t seq;
+};
+#define to_virtio_fence(x) \
+	container_of(x, struct virtio_gpu_fence, f)
+
+struct virtio_gpu_vbuffer {
+	char *buf;
+	int size;
+
+	void *data_buf;
+	uint32_t data_size;
+
+	char *resp_buf;
+	int resp_size;
+
+	virtio_gpu_resp_cb resp_cb;
+
+	struct list_head list;
+};
+
+struct virtio_gpu_output {
+	int index;
+	struct drm_crtc crtc;
+	struct drm_connector conn;
+	struct drm_encoder enc;
+	struct virtio_gpu_display_one info;
+	struct virtio_gpu_update_cursor cursor;
+	int cur_x;
+	int cur_y;
+};
+#define drm_crtc_to_virtio_gpu_output(x) \
+	container_of(x, struct virtio_gpu_output, crtc)
+#define drm_connector_to_virtio_gpu_output(x) \
+	container_of(x, struct virtio_gpu_output, conn)
+#define drm_encoder_to_virtio_gpu_output(x) \
+	container_of(x, struct virtio_gpu_output, enc)
+
+struct virtio_gpu_framebuffer {
+	struct drm_framebuffer base;
+	struct drm_gem_object *obj;
+	int x1, y1, x2, y2; /* dirty rect */
+	spinlock_t dirty_lock;
+	uint32_t hw_res_handle;
+};
+#define to_virtio_gpu_framebuffer(x) \
+	container_of(x, struct virtio_gpu_framebuffer, base)
+
+struct virtio_gpu_mman {
+	struct ttm_bo_global_ref        bo_global_ref;
+	struct drm_global_reference	mem_global_ref;
+	bool				mem_global_referenced;
+	struct ttm_bo_device		bdev;
+};
+
+struct virtio_gpu_fbdev;
+
+struct virtio_gpu_queue {
+	struct virtqueue *vq;
+	spinlock_t qlock;
+	wait_queue_head_t ack_queue;
+	struct work_struct dequeue_work;
+};
+
+struct virtio_gpu_device {
+	struct device *dev;
+	struct drm_device *ddev;
+
+	struct virtio_device *vdev;
+
+	struct virtio_gpu_mman mman;
+
+	/* pointer to fbdev info structure */
+	struct virtio_gpu_fbdev *vgfbdev;
+	struct virtio_gpu_output outputs[VIRTIO_GPU_MAX_SCANOUTS];
+	uint32_t num_scanouts;
+
+	struct virtio_gpu_queue ctrlq;
+	struct virtio_gpu_queue cursorq;
+	struct list_head free_vbufs;
+	void *vbufs;
+	bool vqs_ready;
+
+	struct idr	resource_idr;
+	spinlock_t resource_idr_lock;
+
+	wait_queue_head_t resp_wq;
+	/* current display info */
+	spinlock_t display_info_lock;
+
+	struct virtio_gpu_fence_driver fence_drv;
+
+	struct idr	ctx_id_idr;
+	spinlock_t ctx_id_idr_lock;
+
+	struct work_struct config_changed_work;
+};
+
+struct virtio_gpu_fpriv {
+	uint32_t ctx_id;
+};
+
+/* virtio_ioctl.c */
+#define DRM_VIRTIO_NUM_IOCTLS 10
+extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS];
+
+/* virtio_kms.c */
+int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags);
+int virtio_gpu_driver_unload(struct drm_device *dev);
+
+/* virtio_gem.c */
+void virtio_gpu_gem_free_object(struct drm_gem_object *gem_obj);
+int virtio_gpu_gem_init(struct virtio_gpu_device *vgdev);
+void virtio_gpu_gem_fini(struct virtio_gpu_device *vgdev);
+int virtio_gpu_gem_create(struct drm_file *file,
+			  struct drm_device *dev,
+			  uint64_t size,
+			  struct drm_gem_object **obj_p,
+			  uint32_t *handle_p);
+struct virtio_gpu_object *virtio_gpu_alloc_object(struct drm_device *dev,
+						  size_t size, bool kernel,
+						  bool pinned);
+int virtio_gpu_mode_dumb_create(struct drm_file *file_priv,
+				struct drm_device *dev,
+				struct drm_mode_create_dumb *args);
+int virtio_gpu_mode_dumb_destroy(struct drm_file *file_priv,
+				 struct drm_device *dev,
+				 uint32_t handle);
+int virtio_gpu_mode_dumb_mmap(struct drm_file *file_priv,
+			      struct drm_device *dev,
+			      uint32_t handle, uint64_t *offset_p);
+
+/* virtio_fb */
+#define VIRTIO_GPUFB_CONN_LIMIT 1
+int virtio_gpu_fbdev_init(struct virtio_gpu_device *vgdev);
+void virtio_gpu_fbdev_fini(struct virtio_gpu_device *vgdev);
+int virtio_gpu_surface_dirty(struct virtio_gpu_framebuffer *qfb,
+			     struct drm_clip_rect *clips,
+			     unsigned num_clips);
+/* virtio vg */
+int virtio_gpu_alloc_vbufs(struct virtio_gpu_device *vgdev);
+void virtio_gpu_free_vbufs(struct virtio_gpu_device *vgdev);
+void virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev,
+			       uint32_t *resid);
+void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id);
+void virtio_gpu_cmd_create_resource(struct virtio_gpu_device *vgdev,
+				    uint32_t resource_id,
+				    uint32_t format,
+				    uint32_t width,
+				    uint32_t height);
+void virtio_gpu_cmd_unref_resource(struct virtio_gpu_device *vgdev,
+				   uint32_t resource_id);
+void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
+					uint32_t resource_id, uint64_t offset,
+					__le32 width, __le32 height,
+					__le32 x, __le32 y,
+					struct virtio_gpu_fence **fence);
+void virtio_gpu_cmd_resource_flush(struct virtio_gpu_device *vgdev,
+				   uint32_t resource_id,
+				   uint32_t x, uint32_t y,
+				   uint32_t width, uint32_t height);
+void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev,
+				uint32_t scanout_id, uint32_t resource_id,
+				uint32_t width, uint32_t height,
+				uint32_t x, uint32_t y);
+int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
+			     struct virtio_gpu_object *obj,
+			     uint32_t resource_id,
+			     struct virtio_gpu_fence **fence);
+int virtio_gpu_attach_status_page(struct virtio_gpu_device *vgdev);
+int virtio_gpu_detach_status_page(struct virtio_gpu_device *vgdev);
+void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev,
+			    struct virtio_gpu_output *output);
+int virtio_gpu_cmd_get_display_info(struct virtio_gpu_device *vgdev);
+void virtio_gpu_cmd_resource_inval_backing(struct virtio_gpu_device *vgdev,
+					   uint32_t resource_id);
+void virtio_gpu_ctrl_ack(struct virtqueue *vq);
+void virtio_gpu_cursor_ack(struct virtqueue *vq);
+void virtio_gpu_dequeue_ctrl_func(struct work_struct *work);
+void virtio_gpu_dequeue_cursor_func(struct work_struct *work);
+
+/* virtio_gpu_display.c */
+int virtio_gpu_framebuffer_init(struct drm_device *dev,
+				struct virtio_gpu_framebuffer *vgfb,
+				struct drm_mode_fb_cmd2 *mode_cmd,
+				struct drm_gem_object *obj);
+int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
+void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
+
+/* virtio_gpu_plane.c */
+struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev,
+					int index);
+
+/* virtio_gpu_ttm.c */
+int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev);
+void virtio_gpu_ttm_fini(struct virtio_gpu_device *vgdev);
+int virtio_gpu_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/* virtio_gpu_fence.c */
+int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
+			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
+			  struct virtio_gpu_fence **fence);
+void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
+				    u64 last_seq);
+
+/* virtio_gpu_object */
+int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
+			     unsigned long size, bool kernel, bool pinned,
+			     struct virtio_gpu_object **bo_ptr);
+int virtio_gpu_object_kmap(struct virtio_gpu_object *bo, void **ptr);
+int virtio_gpu_object_get_sg_table(struct virtio_gpu_device *qdev,
+				   struct virtio_gpu_object *bo);
+void virtio_gpu_object_free_sg_table(struct virtio_gpu_object *bo);
+int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait);
+
+static inline struct virtio_gpu_object*
+virtio_gpu_object_ref(struct virtio_gpu_object *bo)
+{
+	ttm_bo_reference(&bo->tbo);
+	return bo;
+}
+
+static inline void virtio_gpu_object_unref(struct virtio_gpu_object **bo)
+{
+	struct ttm_buffer_object *tbo;
+
+	if ((*bo) == NULL)
+		return;
+	tbo = &((*bo)->tbo);
+	ttm_bo_unref(&tbo);
+	if (tbo == NULL)
+		*bo = NULL;
+}
+
+static inline u64 virtio_gpu_object_mmap_offset(struct virtio_gpu_object *bo)
+{
+	return drm_vma_node_offset_addr(&bo->tbo.vma_node);
+}
+
+static inline int virtio_gpu_object_reserve(struct virtio_gpu_object *bo,
+					 bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS) {
+			struct virtio_gpu_device *qdev =
+				bo->gem_base.dev->dev_private;
+			dev_err(qdev->dev, "%p reserve failed\n", bo);
+		}
+		return r;
+	}
+	return 0;
+}
+
+static inline void virtio_gpu_object_unreserve(struct virtio_gpu_object *bo)
+{
+	ttm_bo_unreserve(&bo->tbo);
+}
+
+/* virgl debufs */
+int virtio_gpu_debugfs_init(struct drm_minor *minor);
+void virtio_gpu_debugfs_takedown(struct drm_minor *minor);
+
+#endif
diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c
new file mode 100644
index 000000000000..25bf333d5a45
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_fb.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_fb_helper.h>
+#include "virtgpu_drv.h"
+
+#define VIRTIO_GPU_FBCON_POLL_PERIOD (HZ / 60)
+
+struct virtio_gpu_fbdev {
+	struct drm_fb_helper           helper;
+	struct virtio_gpu_framebuffer  vgfb;
+	struct list_head	       fbdev_list;
+	struct virtio_gpu_device       *vgdev;
+	struct delayed_work            work;
+};
+
+static int virtio_gpu_dirty_update(struct virtio_gpu_framebuffer *fb,
+				   bool store, int x, int y,
+				   int width, int height)
+{
+	struct drm_device *dev = fb->base.dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	bool store_for_later = false;
+	int bpp = fb->base.bits_per_pixel / 8;
+	int x2, y2;
+	unsigned long flags;
+	struct virtio_gpu_object *obj = gem_to_virtio_gpu_obj(fb->obj);
+
+	if ((width <= 0) ||
+	    (x + width > fb->base.width) ||
+	    (y + height > fb->base.height)) {
+		DRM_DEBUG("values out of range %dx%d+%d+%d, fb %dx%d\n",
+			  width, height, x, y,
+			  fb->base.width, fb->base.height);
+		return -EINVAL;
+	}
+
+	/*
+	 * Can be called with pretty much any context (console output
+	 * path).  If we are in atomic just store the dirty rect info
+	 * to send out the update later.
+	 *
+	 * Can't test inside spin lock.
+	 */
+	if (in_atomic() || store)
+		store_for_later = true;
+
+	x2 = x + width - 1;
+	y2 = y + height - 1;
+
+	spin_lock_irqsave(&fb->dirty_lock, flags);
+
+	if (fb->y1 < y)
+		y = fb->y1;
+	if (fb->y2 > y2)
+		y2 = fb->y2;
+	if (fb->x1 < x)
+		x = fb->x1;
+	if (fb->x2 > x2)
+		x2 = fb->x2;
+
+	if (store_for_later) {
+		fb->x1 = x;
+		fb->x2 = x2;
+		fb->y1 = y;
+		fb->y2 = y2;
+		spin_unlock_irqrestore(&fb->dirty_lock, flags);
+		return 0;
+	}
+
+	fb->x1 = fb->y1 = INT_MAX;
+	fb->x2 = fb->y2 = 0;
+
+	spin_unlock_irqrestore(&fb->dirty_lock, flags);
+
+	{
+		uint32_t offset;
+		uint32_t w = x2 - x + 1;
+		uint32_t h = y2 - y + 1;
+
+		offset = (y * fb->base.pitches[0]) + x * bpp;
+
+		virtio_gpu_cmd_transfer_to_host_2d(vgdev, obj->hw_res_handle,
+						   offset,
+						   cpu_to_le32(w),
+						   cpu_to_le32(h),
+						   cpu_to_le32(x),
+						   cpu_to_le32(y),
+						   NULL);
+
+	}
+	virtio_gpu_cmd_resource_flush(vgdev, obj->hw_res_handle,
+				      x, y, x2 - x + 1, y2 - y + 1);
+	return 0;
+}
+
+int virtio_gpu_surface_dirty(struct virtio_gpu_framebuffer *vgfb,
+			     struct drm_clip_rect *clips,
+			     unsigned num_clips)
+{
+	struct virtio_gpu_device *vgdev = vgfb->base.dev->dev_private;
+	struct virtio_gpu_object *obj = gem_to_virtio_gpu_obj(vgfb->obj);
+	struct drm_clip_rect norect;
+	struct drm_clip_rect *clips_ptr;
+	int left, right, top, bottom;
+	int i;
+	int inc = 1;
+	if (!num_clips) {
+		num_clips = 1;
+		clips = &norect;
+		norect.x1 = norect.y1 = 0;
+		norect.x2 = vgfb->base.width;
+		norect.y2 = vgfb->base.height;
+	}
+	left = clips->x1;
+	right = clips->x2;
+	top = clips->y1;
+	bottom = clips->y2;
+
+	/* skip the first clip rect */
+	for (i = 1, clips_ptr = clips + inc;
+	     i < num_clips; i++, clips_ptr += inc) {
+		left = min_t(int, left, (int)clips_ptr->x1);
+		right = max_t(int, right, (int)clips_ptr->x2);
+		top = min_t(int, top, (int)clips_ptr->y1);
+		bottom = max_t(int, bottom, (int)clips_ptr->y2);
+	}
+
+	if (obj->dumb)
+		return virtio_gpu_dirty_update(vgfb, false, left, top,
+					       right - left, bottom - top);
+
+	virtio_gpu_cmd_resource_flush(vgdev, obj->hw_res_handle,
+				      left, top, right - left, bottom - top);
+	return 0;
+}
+
+static void virtio_gpu_fb_dirty_work(struct work_struct *work)
+{
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct virtio_gpu_fbdev *vfbdev =
+		container_of(delayed_work, struct virtio_gpu_fbdev, work);
+	struct virtio_gpu_framebuffer *vgfb = &vfbdev->vgfb;
+
+	virtio_gpu_dirty_update(&vfbdev->vgfb, false, vgfb->x1, vgfb->y1,
+				vgfb->x2 - vgfb->x1, vgfb->y2 - vgfb->y1);
+}
+
+static void virtio_gpu_3d_fillrect(struct fb_info *info,
+				   const struct fb_fillrect *rect)
+{
+	struct virtio_gpu_fbdev *vfbdev = info->par;
+	sys_fillrect(info, rect);
+	virtio_gpu_dirty_update(&vfbdev->vgfb, true, rect->dx, rect->dy,
+			     rect->width, rect->height);
+	schedule_delayed_work(&vfbdev->work, VIRTIO_GPU_FBCON_POLL_PERIOD);
+}
+
+static void virtio_gpu_3d_copyarea(struct fb_info *info,
+				   const struct fb_copyarea *area)
+{
+	struct virtio_gpu_fbdev *vfbdev = info->par;
+	sys_copyarea(info, area);
+	virtio_gpu_dirty_update(&vfbdev->vgfb, true, area->dx, area->dy,
+			   area->width, area->height);
+	schedule_delayed_work(&vfbdev->work, VIRTIO_GPU_FBCON_POLL_PERIOD);
+}
+
+static void virtio_gpu_3d_imageblit(struct fb_info *info,
+				    const struct fb_image *image)
+{
+	struct virtio_gpu_fbdev *vfbdev = info->par;
+	sys_imageblit(info, image);
+	virtio_gpu_dirty_update(&vfbdev->vgfb, true, image->dx, image->dy,
+			     image->width, image->height);
+	schedule_delayed_work(&vfbdev->work, VIRTIO_GPU_FBCON_POLL_PERIOD);
+}
+
+static struct fb_ops virtio_gpufb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par, /* TODO: copy vmwgfx */
+	.fb_fillrect = virtio_gpu_3d_fillrect,
+	.fb_copyarea = virtio_gpu_3d_copyarea,
+	.fb_imageblit = virtio_gpu_3d_imageblit,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
+	.fb_setcmap = drm_fb_helper_setcmap,
+	.fb_debug_enter = drm_fb_helper_debug_enter,
+	.fb_debug_leave = drm_fb_helper_debug_leave,
+};
+
+static int virtio_gpu_vmap_fb(struct virtio_gpu_device *vgdev,
+			      struct virtio_gpu_object *obj)
+{
+	return virtio_gpu_object_kmap(obj, NULL);
+}
+
+static int virtio_gpufb_create(struct drm_fb_helper *helper,
+			       struct drm_fb_helper_surface_size *sizes)
+{
+	struct virtio_gpu_fbdev *vfbdev =
+		container_of(helper, struct virtio_gpu_fbdev, helper);
+	struct drm_device *dev = helper->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct fb_info *info;
+	struct drm_framebuffer *fb;
+	struct drm_mode_fb_cmd2 mode_cmd = {};
+	struct virtio_gpu_object *obj;
+	struct device *device = vgdev->dev;
+	uint32_t resid, format, size;
+	int ret;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.pitches[0] = mode_cmd.width * 4;
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(32, 24);
+
+	switch (mode_cmd.pixel_format) {
+#ifdef __BIG_ENDIAN
+	case DRM_FORMAT_XRGB8888:
+		format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_ARGB8888:
+		format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_BGRX8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
+		break;
+	case DRM_FORMAT_BGRA8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
+		break;
+	case DRM_FORMAT_RGBX8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
+		break;
+	case DRM_FORMAT_RGBA8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
+		break;
+	case DRM_FORMAT_XBGR8888:
+		format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_ABGR8888:
+		format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
+		break;
+#else
+	case DRM_FORMAT_XRGB8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
+		break;
+	case DRM_FORMAT_ARGB8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
+		break;
+	case DRM_FORMAT_BGRX8888:
+		format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_BGRA8888:
+		format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_RGBX8888:
+		format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_RGBA8888:
+		format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_XBGR8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
+		break;
+	case DRM_FORMAT_ABGR8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
+		break;
+#endif
+	default:
+		DRM_ERROR("failed to find virtio gpu format for %d\n",
+			  mode_cmd.pixel_format);
+		return -EINVAL;
+	}
+
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	obj = virtio_gpu_alloc_object(dev, size, false, true);
+	if (!obj)
+		return -ENOMEM;
+
+	virtio_gpu_resource_id_get(vgdev, &resid);
+	virtio_gpu_cmd_create_resource(vgdev, resid, format,
+				       mode_cmd.width, mode_cmd.height);
+
+	ret = virtio_gpu_vmap_fb(vgdev, obj);
+	if (ret) {
+		DRM_ERROR("failed to vmap fb %d\n", ret);
+		goto err_obj_vmap;
+	}
+
+	/* attach the object to the resource */
+	ret = virtio_gpu_object_attach(vgdev, obj, resid, NULL);
+	if (ret)
+		goto err_obj_attach;
+
+	info = framebuffer_alloc(0, device);
+	if (!info) {
+		ret = -ENOMEM;
+		goto err_fb_alloc;
+	}
+
+	ret = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (ret) {
+		ret = -ENOMEM;
+		goto err_fb_alloc_cmap;
+	}
+
+	info->par = helper;
+
+	ret = virtio_gpu_framebuffer_init(dev, &vfbdev->vgfb,
+					  &mode_cmd, &obj->gem_base);
+	if (ret)
+		goto err_fb_init;
+
+	fb = &vfbdev->vgfb.base;
+
+	vfbdev->helper.fb = fb;
+	vfbdev->helper.fbdev = info;
+
+	strcpy(info->fix.id, "virtiodrmfb");
+	info->flags = FBINFO_DEFAULT;
+	info->fbops = &virtio_gpufb_ops;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+
+	info->screen_base = obj->vmap;
+	info->screen_size = obj->gem_base.size;
+	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth);
+	drm_fb_helper_fill_var(info, &vfbdev->helper,
+			       sizes->fb_width, sizes->fb_height);
+
+	info->fix.mmio_start = 0;
+	info->fix.mmio_len = 0;
+	return 0;
+
+err_fb_init:
+	fb_dealloc_cmap(&info->cmap);
+err_fb_alloc_cmap:
+	framebuffer_release(info);
+err_fb_alloc:
+	virtio_gpu_cmd_resource_inval_backing(vgdev, resid);
+err_obj_attach:
+err_obj_vmap:
+	virtio_gpu_gem_free_object(&obj->gem_base);
+	return ret;
+}
+
+static int virtio_gpu_fbdev_destroy(struct drm_device *dev,
+				    struct virtio_gpu_fbdev *vgfbdev)
+{
+	struct fb_info *info;
+	struct virtio_gpu_framebuffer *vgfb = &vgfbdev->vgfb;
+
+	if (vgfbdev->helper.fbdev) {
+		info = vgfbdev->helper.fbdev;
+
+		unregister_framebuffer(info);
+		framebuffer_release(info);
+	}
+	if (vgfb->obj)
+		vgfb->obj = NULL;
+	drm_fb_helper_fini(&vgfbdev->helper);
+	drm_framebuffer_cleanup(&vgfb->base);
+
+	return 0;
+}
+static struct drm_fb_helper_funcs virtio_gpu_fb_helper_funcs = {
+	.fb_probe = virtio_gpufb_create,
+};
+
+int virtio_gpu_fbdev_init(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_fbdev *vgfbdev;
+	int bpp_sel = 32; /* TODO: parameter from somewhere? */
+	int ret;
+
+	vgfbdev = kzalloc(sizeof(struct virtio_gpu_fbdev), GFP_KERNEL);
+	if (!vgfbdev)
+		return -ENOMEM;
+
+	vgfbdev->vgdev = vgdev;
+	vgdev->vgfbdev = vgfbdev;
+	INIT_DELAYED_WORK(&vgfbdev->work, virtio_gpu_fb_dirty_work);
+
+	drm_fb_helper_prepare(vgdev->ddev, &vgfbdev->helper,
+			      &virtio_gpu_fb_helper_funcs);
+	ret = drm_fb_helper_init(vgdev->ddev, &vgfbdev->helper,
+				 vgdev->num_scanouts,
+				 VIRTIO_GPUFB_CONN_LIMIT);
+	if (ret) {
+		kfree(vgfbdev);
+		return ret;
+	}
+
+	drm_fb_helper_single_add_all_connectors(&vgfbdev->helper);
+	drm_fb_helper_initial_config(&vgfbdev->helper, bpp_sel);
+	return 0;
+}
+
+void virtio_gpu_fbdev_fini(struct virtio_gpu_device *vgdev)
+{
+	if (!vgdev->vgfbdev)
+		return;
+
+	virtio_gpu_fbdev_destroy(vgdev->ddev, vgdev->vgfbdev);
+	kfree(vgdev->vgfbdev);
+	vgdev->vgfbdev = NULL;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
new file mode 100644
index 000000000000..1da632631dac
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include "virtgpu_drv.h"
+
+static const char *virtio_get_driver_name(struct fence *f)
+{
+	return "virtio_gpu";
+}
+
+static const char *virtio_get_timeline_name(struct fence *f)
+{
+	return "controlq";
+}
+
+static bool virtio_enable_signaling(struct fence *f)
+{
+	return true;
+}
+
+static bool virtio_signaled(struct fence *f)
+{
+	struct virtio_gpu_fence *fence = to_virtio_fence(f);
+
+	if (atomic64_read(&fence->drv->last_seq) >= fence->seq)
+		return true;
+	return false;
+}
+
+static void virtio_fence_value_str(struct fence *f, char *str, int size)
+{
+	struct virtio_gpu_fence *fence = to_virtio_fence(f);
+
+	snprintf(str, size, "%llu", fence->seq);
+}
+
+static void virtio_timeline_value_str(struct fence *f, char *str, int size)
+{
+	struct virtio_gpu_fence *fence = to_virtio_fence(f);
+
+	snprintf(str, size, "%lu", atomic64_read(&fence->drv->last_seq));
+}
+
+static const struct fence_ops virtio_fence_ops = {
+	.get_driver_name     = virtio_get_driver_name,
+	.get_timeline_name   = virtio_get_timeline_name,
+	.enable_signaling    = virtio_enable_signaling,
+	.signaled            = virtio_signaled,
+	.wait                = fence_default_wait,
+	.fence_value_str     = virtio_fence_value_str,
+	.timeline_value_str  = virtio_timeline_value_str,
+};
+
+int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
+			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
+			  struct virtio_gpu_fence **fence)
+{
+	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
+	unsigned long irq_flags;
+
+	*fence = kmalloc(sizeof(struct virtio_gpu_fence), GFP_KERNEL);
+	if ((*fence) == NULL)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&drv->lock, irq_flags);
+	(*fence)->drv = drv;
+	(*fence)->seq = ++drv->sync_seq;
+	fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock,
+		   0, (*fence)->seq);
+	fence_get(&(*fence)->f);
+	list_add_tail(&(*fence)->node, &drv->fences);
+	spin_unlock_irqrestore(&drv->lock, irq_flags);
+
+	cmd_hdr->flags |= cpu_to_le32(VIRTIO_GPU_FLAG_FENCE);
+	cmd_hdr->fence_id = cpu_to_le64((*fence)->seq);
+	return 0;
+}
+
+void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
+				    u64 last_seq)
+{
+	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
+	struct virtio_gpu_fence *fence, *tmp;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&drv->lock, irq_flags);
+	atomic64_set(&vgdev->fence_drv.last_seq, last_seq);
+	list_for_each_entry_safe(fence, tmp, &drv->fences, node) {
+		if (last_seq < fence->seq)
+			continue;
+		fence_signal_locked(&fence->f);
+		list_del(&fence->node);
+		fence_put(&fence->f);
+	}
+	spin_unlock_irqrestore(&drv->lock, irq_flags);
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
new file mode 100644
index 000000000000..cfa0d27150bd
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include "virtgpu_drv.h"
+
+void virtio_gpu_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct virtio_gpu_object *obj = gem_to_virtio_gpu_obj(gem_obj);
+
+	if (obj)
+		virtio_gpu_object_unref(&obj);
+}
+
+struct virtio_gpu_object *virtio_gpu_alloc_object(struct drm_device *dev,
+						  size_t size, bool kernel,
+						  bool pinned)
+{
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_object *obj;
+	int ret;
+
+	ret = virtio_gpu_object_create(vgdev, size, kernel, pinned, &obj);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return obj;
+}
+
+int virtio_gpu_gem_create(struct drm_file *file,
+			  struct drm_device *dev,
+			  uint64_t size,
+			  struct drm_gem_object **obj_p,
+			  uint32_t *handle_p)
+{
+	struct virtio_gpu_object *obj;
+	int ret;
+	u32 handle;
+
+	obj = virtio_gpu_alloc_object(dev, size, false, false);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	ret = drm_gem_handle_create(file, &obj->gem_base, &handle);
+	if (ret) {
+		drm_gem_object_release(&obj->gem_base);
+		return ret;
+	}
+
+	*obj_p = &obj->gem_base;
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->gem_base);
+
+	*handle_p = handle;
+	return 0;
+}
+
+int virtio_gpu_mode_dumb_create(struct drm_file *file_priv,
+				struct drm_device *dev,
+				struct drm_mode_create_dumb *args)
+{
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct drm_gem_object *gobj;
+	struct virtio_gpu_object *obj;
+	int ret;
+	uint32_t pitch;
+	uint32_t resid;
+
+	pitch = args->width * ((args->bpp + 1) / 8);
+	args->size = pitch * args->height;
+	args->size = ALIGN(args->size, PAGE_SIZE);
+
+	ret = virtio_gpu_gem_create(file_priv, dev, args->size, &gobj,
+				    &args->handle);
+	if (ret)
+		goto fail;
+
+	virtio_gpu_resource_id_get(vgdev, &resid);
+	virtio_gpu_cmd_create_resource(vgdev, resid,
+				       2, args->width, args->height);
+
+	/* attach the object to the resource */
+	obj = gem_to_virtio_gpu_obj(gobj);
+	ret = virtio_gpu_object_attach(vgdev, obj, resid, NULL);
+	if (ret)
+		goto fail;
+
+	obj->dumb = true;
+	args->pitch = pitch;
+	return ret;
+
+fail:
+	return ret;
+}
+
+int virtio_gpu_mode_dumb_destroy(struct drm_file *file_priv,
+				 struct drm_device *dev,
+				 uint32_t handle)
+{
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int virtio_gpu_mode_dumb_mmap(struct drm_file *file_priv,
+			      struct drm_device *dev,
+			      uint32_t handle, uint64_t *offset_p)
+{
+	struct drm_gem_object *gobj;
+	struct virtio_gpu_object *obj;
+	BUG_ON(!offset_p);
+	gobj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (gobj == NULL)
+		return -ENOENT;
+	obj = gem_to_virtio_gpu_obj(gobj);
+	*offset_p = virtio_gpu_object_mmap_offset(obj);
+	drm_gem_object_unreference_unlocked(gobj);
+	return 0;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
new file mode 100644
index 000000000000..132405f15389
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <drm/drmP.h>
+#include "virtgpu_drv.h"
+
+static int virtio_gpu_fbdev = 1;
+
+MODULE_PARM_DESC(fbdev, "Disable/Enable framebuffer device & console");
+module_param_named(fbdev, virtio_gpu_fbdev, int, 0400);
+
+static void virtio_gpu_config_changed_work_func(struct work_struct *work)
+{
+	struct virtio_gpu_device *vgdev =
+		container_of(work, struct virtio_gpu_device,
+			     config_changed_work);
+	u32 events_read, events_clear = 0;
+
+	/* read the config space */
+	virtio_cread(vgdev->vdev, struct virtio_gpu_config,
+		     events_read, &events_read);
+	if (events_read & VIRTIO_GPU_EVENT_DISPLAY) {
+		virtio_gpu_cmd_get_display_info(vgdev);
+		drm_helper_hpd_irq_event(vgdev->ddev);
+		events_clear |= VIRTIO_GPU_EVENT_DISPLAY;
+	}
+	virtio_cwrite(vgdev->vdev, struct virtio_gpu_config,
+		      events_clear, &events_clear);
+}
+
+static void virtio_gpu_init_vq(struct virtio_gpu_queue *vgvq,
+			       void (*work_func)(struct work_struct *work))
+{
+	spin_lock_init(&vgvq->qlock);
+	init_waitqueue_head(&vgvq->ack_queue);
+	INIT_WORK(&vgvq->dequeue_work, work_func);
+}
+
+int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
+{
+	static vq_callback_t *callbacks[] = {
+		virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack
+	};
+	static const char *names[] = { "control", "cursor" };
+
+	struct virtio_gpu_device *vgdev;
+	/* this will expand later */
+	struct virtqueue *vqs[2];
+	u32 num_scanouts;
+	int ret;
+
+	if (!virtio_has_feature(dev->virtdev, VIRTIO_F_VERSION_1))
+		return -ENODEV;
+
+	vgdev = kzalloc(sizeof(struct virtio_gpu_device), GFP_KERNEL);
+	if (!vgdev)
+		return -ENOMEM;
+
+	vgdev->ddev = dev;
+	dev->dev_private = vgdev;
+	vgdev->vdev = dev->virtdev;
+	vgdev->dev = dev->dev;
+
+	spin_lock_init(&vgdev->display_info_lock);
+	spin_lock_init(&vgdev->ctx_id_idr_lock);
+	idr_init(&vgdev->ctx_id_idr);
+	spin_lock_init(&vgdev->resource_idr_lock);
+	idr_init(&vgdev->resource_idr);
+	init_waitqueue_head(&vgdev->resp_wq);
+	virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func);
+	virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func);
+
+	spin_lock_init(&vgdev->fence_drv.lock);
+	INIT_LIST_HEAD(&vgdev->fence_drv.fences);
+	INIT_WORK(&vgdev->config_changed_work,
+		  virtio_gpu_config_changed_work_func);
+
+	ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
+					    callbacks, names);
+	if (ret) {
+		DRM_ERROR("failed to find virt queues\n");
+		goto err_vqs;
+	}
+	vgdev->ctrlq.vq = vqs[0];
+	vgdev->cursorq.vq = vqs[1];
+	ret = virtio_gpu_alloc_vbufs(vgdev);
+	if (ret) {
+		DRM_ERROR("failed to alloc vbufs\n");
+		goto err_vbufs;
+	}
+
+	ret = virtio_gpu_ttm_init(vgdev);
+	if (ret) {
+		DRM_ERROR("failed to init ttm %d\n", ret);
+		goto err_ttm;
+	}
+
+	/* get display info */
+	virtio_cread(vgdev->vdev, struct virtio_gpu_config,
+		     num_scanouts, &num_scanouts);
+	vgdev->num_scanouts = min_t(uint32_t, num_scanouts,
+				    VIRTIO_GPU_MAX_SCANOUTS);
+	if (!vgdev->num_scanouts) {
+		DRM_ERROR("num_scanouts is zero\n");
+		ret = -EINVAL;
+		goto err_scanouts;
+	}
+
+	ret = virtio_gpu_modeset_init(vgdev);
+	if (ret)
+		goto err_modeset;
+
+	virtio_device_ready(vgdev->vdev);
+	vgdev->vqs_ready = true;
+
+	if (virtio_gpu_fbdev)
+		virtio_gpu_fbdev_init(vgdev);
+	virtio_gpu_cmd_get_display_info(vgdev);
+
+	return 0;
+
+err_modeset:
+err_scanouts:
+	virtio_gpu_ttm_fini(vgdev);
+err_ttm:
+	virtio_gpu_free_vbufs(vgdev);
+err_vbufs:
+	vgdev->vdev->config->del_vqs(vgdev->vdev);
+err_vqs:
+	kfree(vgdev);
+	return ret;
+}
+
+int virtio_gpu_driver_unload(struct drm_device *dev)
+{
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+
+	vgdev->vqs_ready = false;
+	flush_work(&vgdev->ctrlq.dequeue_work);
+	flush_work(&vgdev->cursorq.dequeue_work);
+	flush_work(&vgdev->config_changed_work);
+	vgdev->vdev->config->del_vqs(vgdev->vdev);
+
+	virtio_gpu_modeset_fini(vgdev);
+	virtio_gpu_ttm_fini(vgdev);
+	virtio_gpu_free_vbufs(vgdev);
+	kfree(vgdev);
+	return 0;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
new file mode 100644
index 000000000000..2c624c784c1d
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "virtgpu_drv.h"
+
+static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
+{
+	struct virtio_gpu_object *bo;
+	struct virtio_gpu_device *vgdev;
+
+	bo = container_of(tbo, struct virtio_gpu_object, tbo);
+	vgdev = (struct virtio_gpu_device *)bo->gem_base.dev->dev_private;
+
+	if (bo->hw_res_handle)
+		virtio_gpu_cmd_unref_resource(vgdev, bo->hw_res_handle);
+	if (bo->pages)
+		virtio_gpu_object_free_sg_table(bo);
+	drm_gem_object_release(&bo->gem_base);
+	kfree(bo);
+}
+
+static void virtio_gpu_init_ttm_placement(struct virtio_gpu_object *vgbo,
+					  bool pinned)
+{
+	u32 c = 1;
+	u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0;
+
+	vgbo->placement.placement = &vgbo->placement_code;
+	vgbo->placement.busy_placement = &vgbo->placement_code;
+	vgbo->placement_code.fpfn = 0;
+	vgbo->placement_code.lpfn = 0;
+	vgbo->placement_code.flags =
+		TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT | pflag;
+	vgbo->placement.num_placement = c;
+	vgbo->placement.num_busy_placement = c;
+
+}
+
+int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
+			     unsigned long size, bool kernel, bool pinned,
+			     struct virtio_gpu_object **bo_ptr)
+{
+	struct virtio_gpu_object *bo;
+	enum ttm_bo_type type;
+	size_t acc_size;
+	int ret;
+
+	if (kernel)
+		type = ttm_bo_type_kernel;
+	else
+		type = ttm_bo_type_device;
+	*bo_ptr = NULL;
+
+	acc_size = ttm_bo_dma_acc_size(&vgdev->mman.bdev, size,
+				       sizeof(struct virtio_gpu_object));
+
+	bo = kzalloc(sizeof(struct virtio_gpu_object), GFP_KERNEL);
+	if (bo == NULL)
+		return -ENOMEM;
+	size = roundup(size, PAGE_SIZE);
+	ret = drm_gem_object_init(vgdev->ddev, &bo->gem_base, size);
+	if (ret != 0)
+		goto err_gem_init;
+	bo->dumb = false;
+	virtio_gpu_init_ttm_placement(bo, pinned);
+
+	ret = ttm_bo_init(&vgdev->mman.bdev, &bo->tbo, size, type,
+			  &bo->placement, 0, !kernel, NULL, acc_size,
+			  NULL, NULL, &virtio_gpu_ttm_bo_destroy);
+	if (ret != 0)
+		goto err_ttm_init;
+
+	*bo_ptr = bo;
+	return 0;
+
+err_ttm_init:
+	drm_gem_object_release(&bo->gem_base);
+err_gem_init:
+	kfree(bo);
+	return ret;
+}
+
+int virtio_gpu_object_kmap(struct virtio_gpu_object *bo, void **ptr)
+{
+	bool is_iomem;
+	int r;
+
+	if (bo->vmap) {
+		if (ptr)
+			*ptr = bo->vmap;
+		return 0;
+	}
+	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
+	if (r)
+		return r;
+	bo->vmap = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+	if (ptr)
+		*ptr = bo->vmap;
+	return 0;
+}
+
+int virtio_gpu_object_get_sg_table(struct virtio_gpu_device *qdev,
+				   struct virtio_gpu_object *bo)
+{
+	int ret;
+	struct page **pages = bo->tbo.ttm->pages;
+	int nr_pages = bo->tbo.num_pages;
+
+	/* wtf swapping */
+	if (bo->pages)
+		return 0;
+
+	if (bo->tbo.ttm->state == tt_unpopulated)
+		bo->tbo.ttm->bdev->driver->ttm_tt_populate(bo->tbo.ttm);
+	bo->pages = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!bo->pages)
+		goto out;
+
+	ret = sg_alloc_table_from_pages(bo->pages, pages, nr_pages, 0,
+					nr_pages << PAGE_SHIFT, GFP_KERNEL);
+	if (ret)
+		goto out;
+	return 0;
+out:
+	kfree(bo->pages);
+	bo->pages = NULL;
+	return -ENOMEM;
+}
+
+void virtio_gpu_object_free_sg_table(struct virtio_gpu_object *bo)
+{
+	sg_free_table(bo->pages);
+	kfree(bo->pages);
+	bo->pages = NULL;
+}
+
+int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
+	if (unlikely(r != 0))
+		return r;
+	r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+	ttm_bo_unreserve(&bo->tbo);
+	return r;
+}
+
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
new file mode 100644
index 000000000000..4a74129c5708
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "virtgpu_drv.h"
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_atomic_helper.h>
+
+static const uint32_t virtio_gpu_formats[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_RGBX8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR8888,
+};
+
+static void virtio_gpu_plane_destroy(struct drm_plane *plane)
+{
+	kfree(plane);
+}
+
+static const struct drm_plane_funcs virtio_gpu_plane_funcs = {
+	.update_plane		= drm_atomic_helper_update_plane,
+	.disable_plane		= drm_atomic_helper_disable_plane,
+	.destroy		= virtio_gpu_plane_destroy,
+	.reset			= drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
+};
+
+static int virtio_gpu_plane_atomic_check(struct drm_plane *plane,
+					 struct drm_plane_state *state)
+{
+	return 0;
+}
+
+static void virtio_gpu_plane_atomic_update(struct drm_plane *plane,
+					   struct drm_plane_state *old_state)
+{
+	struct drm_device *dev = plane->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(plane->crtc);
+	struct virtio_gpu_framebuffer *vgfb;
+	struct virtio_gpu_object *bo;
+	uint32_t handle;
+
+	if (plane->fb) {
+		vgfb = to_virtio_gpu_framebuffer(plane->fb);
+		bo = gem_to_virtio_gpu_obj(vgfb->obj);
+		handle = bo->hw_res_handle;
+	} else {
+		handle = 0;
+	}
+
+	DRM_DEBUG("handle 0x%x, crtc %dx%d+%d+%d\n", handle,
+		  plane->state->crtc_w, plane->state->crtc_h,
+		  plane->state->crtc_x, plane->state->crtc_y);
+	virtio_gpu_cmd_set_scanout(vgdev, output->index, handle,
+				   plane->state->crtc_w,
+				   plane->state->crtc_h,
+				   plane->state->crtc_x,
+				   plane->state->crtc_y);
+}
+
+
+static const struct drm_plane_helper_funcs virtio_gpu_plane_helper_funcs = {
+	.atomic_check		= virtio_gpu_plane_atomic_check,
+	.atomic_update		= virtio_gpu_plane_atomic_update,
+};
+
+struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev,
+					int index)
+{
+	struct drm_device *dev = vgdev->ddev;
+	struct drm_plane *plane;
+	int ret;
+
+	plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_universal_plane_init(dev, plane, 1 << index,
+				       &virtio_gpu_plane_funcs,
+				       virtio_gpu_formats,
+				       ARRAY_SIZE(virtio_gpu_formats),
+				       DRM_PLANE_TYPE_PRIMARY);
+	if (ret)
+		goto err_plane_init;
+
+	drm_plane_helper_add(plane, &virtio_gpu_plane_helper_funcs);
+	return plane;
+
+err_plane_init:
+	kfree(plane);
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
new file mode 100644
index 000000000000..e0e74c6bb959
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ *    Dave Airlie
+ *    Alon Levy
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <ttm/ttm_bo_api.h>
+#include <ttm/ttm_bo_driver.h>
+#include <ttm/ttm_placement.h>
+#include <ttm/ttm_page_alloc.h>
+#include <ttm/ttm_module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+#include "virtgpu_drv.h"
+
+#include <linux/delay.h>
+
+#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
+
+static struct
+virtio_gpu_device *virtio_gpu_get_vgdev(struct ttm_bo_device *bdev)
+{
+	struct virtio_gpu_mman *mman;
+	struct virtio_gpu_device *vgdev;
+
+	mman = container_of(bdev, struct virtio_gpu_mman, bdev);
+	vgdev = container_of(mman, struct virtio_gpu_device, mman);
+	return vgdev;
+}
+
+static int virtio_gpu_ttm_mem_global_init(struct drm_global_reference *ref)
+{
+	return ttm_mem_global_init(ref->object);
+}
+
+static void virtio_gpu_ttm_mem_global_release(struct drm_global_reference *ref)
+{
+	ttm_mem_global_release(ref->object);
+}
+
+static int virtio_gpu_ttm_global_init(struct virtio_gpu_device *vgdev)
+{
+	struct drm_global_reference *global_ref;
+	int r;
+
+	vgdev->mman.mem_global_referenced = false;
+	global_ref = &vgdev->mman.mem_global_ref;
+	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
+	global_ref->size = sizeof(struct ttm_mem_global);
+	global_ref->init = &virtio_gpu_ttm_mem_global_init;
+	global_ref->release = &virtio_gpu_ttm_mem_global_release;
+
+	r = drm_global_item_ref(global_ref);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up TTM memory accounting "
+			  "subsystem.\n");
+		return r;
+	}
+
+	vgdev->mman.bo_global_ref.mem_glob =
+		vgdev->mman.mem_global_ref.object;
+	global_ref = &vgdev->mman.bo_global_ref.ref;
+	global_ref->global_type = DRM_GLOBAL_TTM_BO;
+	global_ref->size = sizeof(struct ttm_bo_global);
+	global_ref->init = &ttm_bo_global_init;
+	global_ref->release = &ttm_bo_global_release;
+	r = drm_global_item_ref(global_ref);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
+		drm_global_item_unref(&vgdev->mman.mem_global_ref);
+		return r;
+	}
+
+	vgdev->mman.mem_global_referenced = true;
+	return 0;
+}
+
+static void virtio_gpu_ttm_global_fini(struct virtio_gpu_device *vgdev)
+{
+	if (vgdev->mman.mem_global_referenced) {
+		drm_global_item_unref(&vgdev->mman.bo_global_ref.ref);
+		drm_global_item_unref(&vgdev->mman.mem_global_ref);
+		vgdev->mman.mem_global_referenced = false;
+	}
+}
+
+#if 0
+/*
+ * Hmm, seems to not do anything useful.  Leftover debug hack?
+ * Something like printing pagefaults to kernel log?
+ */
+static struct vm_operations_struct virtio_gpu_ttm_vm_ops;
+static const struct vm_operations_struct *ttm_vm_ops;
+
+static int virtio_gpu_ttm_fault(struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *bo;
+	struct virtio_gpu_device *vgdev;
+	int r;
+
+	bo = (struct ttm_buffer_object *)vma->vm_private_data;
+	if (bo == NULL)
+		return VM_FAULT_NOPAGE;
+	vgdev = virtio_gpu_get_vgdev(bo->bdev);
+	r = ttm_vm_ops->fault(vma, vmf);
+	return r;
+}
+#endif
+
+int virtio_gpu_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv;
+	struct virtio_gpu_device *vgdev;
+	int r;
+
+	file_priv = filp->private_data;
+	vgdev = file_priv->minor->dev->dev_private;
+	if (vgdev == NULL) {
+		DRM_ERROR(
+		 "filp->private_data->minor->dev->dev_private == NULL\n");
+		return -EINVAL;
+	}
+	r = ttm_bo_mmap(filp, vma, &vgdev->mman.bdev);
+#if 0
+	if (unlikely(r != 0))
+		return r;
+	if (unlikely(ttm_vm_ops == NULL)) {
+		ttm_vm_ops = vma->vm_ops;
+		virtio_gpu_ttm_vm_ops = *ttm_vm_ops;
+		virtio_gpu_ttm_vm_ops.fault = &virtio_gpu_ttm_fault;
+	}
+	vma->vm_ops = &virtio_gpu_ttm_vm_ops;
+	return 0;
+#else
+	return r;
+#endif
+}
+
+static int virtio_gpu_invalidate_caches(struct ttm_bo_device *bdev,
+					uint32_t flags)
+{
+	return 0;
+}
+
+static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
+			       struct ttm_buffer_object *bo,
+			       const struct ttm_place *place,
+			       struct ttm_mem_reg *mem)
+{
+	mem->mm_node = (void *)1;
+	return 0;
+}
+
+static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
+				struct ttm_mem_reg *mem)
+{
+	mem->mm_node = (void *)NULL;
+	return;
+}
+
+static int ttm_bo_man_init(struct ttm_mem_type_manager *man,
+			   unsigned long p_size)
+{
+	return 0;
+}
+
+static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man)
+{
+	return 0;
+}
+
+static void ttm_bo_man_debug(struct ttm_mem_type_manager *man,
+			     const char *prefix)
+{
+}
+
+static const struct ttm_mem_type_manager_func virtio_gpu_bo_manager_func = {
+	ttm_bo_man_init,
+	ttm_bo_man_takedown,
+	ttm_bo_man_get_node,
+	ttm_bo_man_put_node,
+	ttm_bo_man_debug
+};
+
+static int virtio_gpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
+				    struct ttm_mem_type_manager *man)
+{
+	struct virtio_gpu_device *vgdev;
+
+	vgdev = virtio_gpu_get_vgdev(bdev);
+
+	switch (type) {
+	case TTM_PL_SYSTEM:
+		/* System memory */
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case TTM_PL_TT:
+		man->func = &virtio_gpu_bo_manager_func;
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	default:
+		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void virtio_gpu_evict_flags(struct ttm_buffer_object *bo,
+				struct ttm_placement *placement)
+{
+	static struct ttm_place placements = {
+		.fpfn  = 0,
+		.lpfn  = 0,
+		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM,
+	};
+
+	placement->placement = &placements;
+	placement->busy_placement = &placements;
+	placement->num_placement = 1;
+	placement->num_busy_placement = 1;
+	return;
+}
+
+static int virtio_gpu_verify_access(struct ttm_buffer_object *bo,
+				    struct file *filp)
+{
+	return 0;
+}
+
+static int virtio_gpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
+					 struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+
+	mem->bus.addr = NULL;
+	mem->bus.offset = 0;
+	mem->bus.size = mem->num_pages << PAGE_SHIFT;
+	mem->bus.base = 0;
+	mem->bus.is_iomem = false;
+	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
+		return -EINVAL;
+	switch (mem->mem_type) {
+	case TTM_PL_SYSTEM:
+	case TTM_PL_TT:
+		/* system memory */
+		return 0;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void virtio_gpu_ttm_io_mem_free(struct ttm_bo_device *bdev,
+				       struct ttm_mem_reg *mem)
+{
+}
+
+/*
+ * TTM backend functions.
+ */
+struct virtio_gpu_ttm_tt {
+	struct ttm_dma_tt		ttm;
+	struct virtio_gpu_device	*vgdev;
+	u64				offset;
+};
+
+static int virtio_gpu_ttm_backend_bind(struct ttm_tt *ttm,
+				       struct ttm_mem_reg *bo_mem)
+{
+	struct virtio_gpu_ttm_tt *gtt = (void *)ttm;
+
+	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
+	if (!ttm->num_pages)
+		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
+		     ttm->num_pages, bo_mem, ttm);
+
+	/* Not implemented */
+	return 0;
+}
+
+static int virtio_gpu_ttm_backend_unbind(struct ttm_tt *ttm)
+{
+	/* Not implemented */
+	return 0;
+}
+
+static void virtio_gpu_ttm_backend_destroy(struct ttm_tt *ttm)
+{
+	struct virtio_gpu_ttm_tt *gtt = (void *)ttm;
+
+	ttm_dma_tt_fini(&gtt->ttm);
+	kfree(gtt);
+}
+
+static struct ttm_backend_func virtio_gpu_backend_func = {
+	.bind = &virtio_gpu_ttm_backend_bind,
+	.unbind = &virtio_gpu_ttm_backend_unbind,
+	.destroy = &virtio_gpu_ttm_backend_destroy,
+};
+
+static int virtio_gpu_ttm_tt_populate(struct ttm_tt *ttm)
+{
+	if (ttm->state != tt_unpopulated)
+		return 0;
+
+	return ttm_pool_populate(ttm);
+}
+
+static void virtio_gpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
+{
+	ttm_pool_unpopulate(ttm);
+}
+
+static struct ttm_tt *virtio_gpu_ttm_tt_create(struct ttm_bo_device *bdev,
+					       unsigned long size,
+					       uint32_t page_flags,
+					       struct page *dummy_read_page)
+{
+	struct virtio_gpu_device *vgdev;
+	struct virtio_gpu_ttm_tt *gtt;
+
+	vgdev = virtio_gpu_get_vgdev(bdev);
+	gtt = kzalloc(sizeof(struct virtio_gpu_ttm_tt), GFP_KERNEL);
+	if (gtt == NULL)
+		return NULL;
+	gtt->ttm.ttm.func = &virtio_gpu_backend_func;
+	gtt->vgdev = vgdev;
+	if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags,
+			    dummy_read_page)) {
+		kfree(gtt);
+		return NULL;
+	}
+	return &gtt->ttm.ttm;
+}
+
+static void virtio_gpu_move_null(struct ttm_buffer_object *bo,
+				 struct ttm_mem_reg *new_mem)
+{
+	struct ttm_mem_reg *old_mem = &bo->mem;
+
+	BUG_ON(old_mem->mm_node != NULL);
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+}
+
+static int virtio_gpu_bo_move(struct ttm_buffer_object *bo,
+			      bool evict, bool interruptible,
+			      bool no_wait_gpu,
+			      struct ttm_mem_reg *new_mem)
+{
+	virtio_gpu_move_null(bo, new_mem);
+	return 0;
+}
+
+static void virtio_gpu_bo_move_notify(struct ttm_buffer_object *tbo,
+				      struct ttm_mem_reg *new_mem)
+{
+	struct virtio_gpu_object *bo;
+	struct virtio_gpu_device *vgdev;
+
+	bo = container_of(tbo, struct virtio_gpu_object, tbo);
+	vgdev = (struct virtio_gpu_device *)bo->gem_base.dev->dev_private;
+
+	if (!new_mem || (new_mem->placement & TTM_PL_FLAG_SYSTEM)) {
+		if (bo->hw_res_handle)
+			virtio_gpu_cmd_resource_inval_backing(vgdev,
+							   bo->hw_res_handle);
+
+	} else if (new_mem->placement & TTM_PL_FLAG_TT) {
+		if (bo->hw_res_handle) {
+			virtio_gpu_object_attach(vgdev, bo, bo->hw_res_handle,
+						 NULL);
+		}
+	}
+}
+
+static void virtio_gpu_bo_swap_notify(struct ttm_buffer_object *tbo)
+{
+	struct virtio_gpu_object *bo;
+	struct virtio_gpu_device *vgdev;
+
+	bo = container_of(tbo, struct virtio_gpu_object, tbo);
+	vgdev = (struct virtio_gpu_device *)bo->gem_base.dev->dev_private;
+
+	if (bo->pages)
+		virtio_gpu_object_free_sg_table(bo);
+}
+
+static struct ttm_bo_driver virtio_gpu_bo_driver = {
+	.ttm_tt_create = &virtio_gpu_ttm_tt_create,
+	.ttm_tt_populate = &virtio_gpu_ttm_tt_populate,
+	.ttm_tt_unpopulate = &virtio_gpu_ttm_tt_unpopulate,
+	.invalidate_caches = &virtio_gpu_invalidate_caches,
+	.init_mem_type = &virtio_gpu_init_mem_type,
+	.evict_flags = &virtio_gpu_evict_flags,
+	.move = &virtio_gpu_bo_move,
+	.verify_access = &virtio_gpu_verify_access,
+	.io_mem_reserve = &virtio_gpu_ttm_io_mem_reserve,
+	.io_mem_free = &virtio_gpu_ttm_io_mem_free,
+	.move_notify = &virtio_gpu_bo_move_notify,
+	.swap_notify = &virtio_gpu_bo_swap_notify,
+};
+
+int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev)
+{
+	int r;
+
+	r = virtio_gpu_ttm_global_init(vgdev);
+	if (r)
+		return r;
+	/* No others user of address space so set it to 0 */
+	r = ttm_bo_device_init(&vgdev->mman.bdev,
+			       vgdev->mman.bo_global_ref.ref.object,
+			       &virtio_gpu_bo_driver,
+			       vgdev->ddev->anon_inode->i_mapping,
+			       DRM_FILE_PAGE_OFFSET, 0);
+	if (r) {
+		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+		goto err_dev_init;
+		return r;
+	}
+
+	r = ttm_bo_init_mm(&vgdev->mman.bdev, TTM_PL_TT, 0);
+	if (r) {
+		DRM_ERROR("Failed initializing GTT heap.\n");
+		goto err_mm_init;
+		return r;
+	}
+	return 0;
+
+err_mm_init:
+	ttm_bo_device_release(&vgdev->mman.bdev);
+err_dev_init:
+	virtio_gpu_ttm_global_fini(vgdev);
+	return r;
+}
+
+void virtio_gpu_ttm_fini(struct virtio_gpu_device *vgdev)
+{
+	ttm_bo_device_release(&vgdev->mman.bdev);
+	virtio_gpu_ttm_global_fini(vgdev);
+	DRM_INFO("virtio_gpu: ttm finalized\n");
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
new file mode 100644
index 000000000000..8fa6513eb3bc
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -0,0 +1,614 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ *    Dave Airlie <airlied@redhat.com>
+ *    Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include "virtgpu_drv.h"
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+#define MAX_INLINE_CMD_SIZE   96
+#define MAX_INLINE_RESP_SIZE  24
+#define VBUFFER_SIZE          (sizeof(struct virtio_gpu_vbuffer) \
+			       + MAX_INLINE_CMD_SIZE		 \
+			       + MAX_INLINE_RESP_SIZE)
+
+void virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev,
+				uint32_t *resid)
+{
+	int handle;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&vgdev->resource_idr_lock);
+	handle = idr_alloc(&vgdev->resource_idr, NULL, 1, 0, GFP_NOWAIT);
+	spin_unlock(&vgdev->resource_idr_lock);
+	idr_preload_end();
+	*resid = handle;
+}
+
+void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id)
+{
+	spin_lock(&vgdev->resource_idr_lock);
+	idr_remove(&vgdev->resource_idr, id);
+	spin_unlock(&vgdev->resource_idr_lock);
+}
+
+void virtio_gpu_ctrl_ack(struct virtqueue *vq)
+{
+	struct drm_device *dev = vq->vdev->priv;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	schedule_work(&vgdev->ctrlq.dequeue_work);
+}
+
+void virtio_gpu_cursor_ack(struct virtqueue *vq)
+{
+	struct drm_device *dev = vq->vdev->priv;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	schedule_work(&vgdev->cursorq.dequeue_work);
+}
+
+int virtio_gpu_alloc_vbufs(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+	int i, size, count = 0;
+	void *ptr;
+
+	INIT_LIST_HEAD(&vgdev->free_vbufs);
+	count += virtqueue_get_vring_size(vgdev->ctrlq.vq);
+	count += virtqueue_get_vring_size(vgdev->cursorq.vq);
+	size = count * VBUFFER_SIZE;
+	DRM_INFO("virtio vbuffers: %d bufs, %zdB each, %dkB total.\n",
+		 count, VBUFFER_SIZE, size / 1024);
+
+	vgdev->vbufs = kzalloc(size, GFP_KERNEL);
+	if (!vgdev->vbufs)
+		return -ENOMEM;
+
+	for (i = 0, ptr = vgdev->vbufs;
+	     i < count;
+	     i++, ptr += VBUFFER_SIZE) {
+		vbuf = ptr;
+		list_add(&vbuf->list, &vgdev->free_vbufs);
+	}
+	return 0;
+}
+
+void virtio_gpu_free_vbufs(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+	int i, count = 0;
+
+	count += virtqueue_get_vring_size(vgdev->ctrlq.vq);
+	count += virtqueue_get_vring_size(vgdev->cursorq.vq);
+
+	for (i = 0; i < count; i++) {
+		if (WARN_ON(list_empty(&vgdev->free_vbufs)))
+			return;
+		vbuf = list_first_entry(&vgdev->free_vbufs,
+					struct virtio_gpu_vbuffer, list);
+		list_del(&vbuf->list);
+	}
+	kfree(vgdev->vbufs);
+}
+
+static struct virtio_gpu_vbuffer*
+virtio_gpu_get_vbuf(struct virtio_gpu_device *vgdev,
+		    int size, int resp_size, void *resp_buf,
+		    virtio_gpu_resp_cb resp_cb)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+
+	BUG_ON(list_empty(&vgdev->free_vbufs));
+	vbuf = list_first_entry(&vgdev->free_vbufs,
+				struct virtio_gpu_vbuffer, list);
+	list_del(&vbuf->list);
+	memset(vbuf, 0, VBUFFER_SIZE);
+
+	BUG_ON(size > MAX_INLINE_CMD_SIZE);
+	vbuf->buf = (void *)vbuf + sizeof(*vbuf);
+	vbuf->size = size;
+
+	vbuf->resp_cb = resp_cb;
+	vbuf->resp_size = resp_size;
+	if (resp_size <= MAX_INLINE_RESP_SIZE)
+		vbuf->resp_buf = (void *)vbuf->buf + size;
+	else
+		vbuf->resp_buf = resp_buf;
+	BUG_ON(!vbuf->resp_buf);
+	return vbuf;
+}
+
+static void *virtio_gpu_alloc_cmd(struct virtio_gpu_device *vgdev,
+				  struct virtio_gpu_vbuffer **vbuffer_p,
+				  int size)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+
+	vbuf = virtio_gpu_get_vbuf(vgdev, size,
+				   sizeof(struct virtio_gpu_ctrl_hdr),
+				   NULL, NULL);
+	if (IS_ERR(vbuf)) {
+		*vbuffer_p = NULL;
+		return ERR_CAST(vbuf);
+	}
+	*vbuffer_p = vbuf;
+	return vbuf->buf;
+}
+
+static struct virtio_gpu_update_cursor*
+virtio_gpu_alloc_cursor(struct virtio_gpu_device *vgdev,
+			struct virtio_gpu_vbuffer **vbuffer_p)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+
+	vbuf = virtio_gpu_get_vbuf
+		(vgdev, sizeof(struct virtio_gpu_update_cursor),
+		 0, NULL, NULL);
+	if (IS_ERR(vbuf)) {
+		*vbuffer_p = NULL;
+		return ERR_CAST(vbuf);
+	}
+	*vbuffer_p = vbuf;
+	return (struct virtio_gpu_update_cursor *)vbuf->buf;
+}
+
+static void *virtio_gpu_alloc_cmd_resp(struct virtio_gpu_device *vgdev,
+				       virtio_gpu_resp_cb cb,
+				       struct virtio_gpu_vbuffer **vbuffer_p,
+				       int cmd_size, int resp_size,
+				       void *resp_buf)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+
+	vbuf = virtio_gpu_get_vbuf(vgdev, cmd_size,
+				   resp_size, resp_buf, cb);
+	if (IS_ERR(vbuf)) {
+		*vbuffer_p = NULL;
+		return ERR_CAST(vbuf);
+	}
+	*vbuffer_p = vbuf;
+	return (struct virtio_gpu_command *)vbuf->buf;
+}
+
+static void free_vbuf(struct virtio_gpu_device *vgdev,
+		      struct virtio_gpu_vbuffer *vbuf)
+{
+	if (vbuf->resp_size > MAX_INLINE_RESP_SIZE)
+		kfree(vbuf->resp_buf);
+	kfree(vbuf->data_buf);
+	list_add(&vbuf->list, &vgdev->free_vbufs);
+}
+
+static void reclaim_vbufs(struct virtqueue *vq, struct list_head *reclaim_list)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+	unsigned int len;
+	int freed = 0;
+
+	while ((vbuf = virtqueue_get_buf(vq, &len))) {
+		list_add_tail(&vbuf->list, reclaim_list);
+		freed++;
+	}
+	if (freed == 0)
+		DRM_DEBUG("Huh? zero vbufs reclaimed");
+}
+
+void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
+{
+	struct virtio_gpu_device *vgdev =
+		container_of(work, struct virtio_gpu_device,
+			     ctrlq.dequeue_work);
+	struct list_head reclaim_list;
+	struct virtio_gpu_vbuffer *entry, *tmp;
+	struct virtio_gpu_ctrl_hdr *resp;
+	u64 fence_id = 0;
+
+	INIT_LIST_HEAD(&reclaim_list);
+	spin_lock(&vgdev->ctrlq.qlock);
+	do {
+		virtqueue_disable_cb(vgdev->ctrlq.vq);
+		reclaim_vbufs(vgdev->ctrlq.vq, &reclaim_list);
+
+	} while (!virtqueue_enable_cb(vgdev->ctrlq.vq));
+	spin_unlock(&vgdev->ctrlq.qlock);
+
+	list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
+		resp = (struct virtio_gpu_ctrl_hdr *)entry->resp_buf;
+		if (resp->type != cpu_to_le32(VIRTIO_GPU_RESP_OK_NODATA))
+			DRM_DEBUG("response 0x%x\n", le32_to_cpu(resp->type));
+		if (resp->flags & cpu_to_le32(VIRTIO_GPU_FLAG_FENCE)) {
+			u64 f = le64_to_cpu(resp->fence_id);
+
+			if (fence_id > f) {
+				DRM_ERROR("%s: Oops: fence %llx -> %llx\n",
+					  __func__, fence_id, f);
+			} else {
+				fence_id = f;
+			}
+		}
+		if (entry->resp_cb)
+			entry->resp_cb(vgdev, entry);
+
+		list_del(&entry->list);
+		free_vbuf(vgdev, entry);
+	}
+	wake_up(&vgdev->ctrlq.ack_queue);
+
+	if (fence_id)
+		virtio_gpu_fence_event_process(vgdev, fence_id);
+}
+
+void virtio_gpu_dequeue_cursor_func(struct work_struct *work)
+{
+	struct virtio_gpu_device *vgdev =
+		container_of(work, struct virtio_gpu_device,
+			     cursorq.dequeue_work);
+	struct list_head reclaim_list;
+	struct virtio_gpu_vbuffer *entry, *tmp;
+
+	INIT_LIST_HEAD(&reclaim_list);
+	spin_lock(&vgdev->cursorq.qlock);
+	do {
+		virtqueue_disable_cb(vgdev->cursorq.vq);
+		reclaim_vbufs(vgdev->cursorq.vq, &reclaim_list);
+	} while (!virtqueue_enable_cb(vgdev->cursorq.vq));
+	spin_unlock(&vgdev->cursorq.qlock);
+
+	list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
+		list_del(&entry->list);
+		free_vbuf(vgdev, entry);
+	}
+	wake_up(&vgdev->cursorq.ack_queue);
+}
+
+static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev,
+					struct virtio_gpu_vbuffer *vbuf)
+{
+	struct virtqueue *vq = vgdev->ctrlq.vq;
+	struct scatterlist *sgs[3], vcmd, vout, vresp;
+	int outcnt = 0, incnt = 0;
+	int ret;
+
+	if (!vgdev->vqs_ready)
+		return -ENODEV;
+
+	sg_init_one(&vcmd, vbuf->buf, vbuf->size);
+	sgs[outcnt+incnt] = &vcmd;
+	outcnt++;
+
+	if (vbuf->data_size) {
+		sg_init_one(&vout, vbuf->data_buf, vbuf->data_size);
+		sgs[outcnt + incnt] = &vout;
+		outcnt++;
+	}
+
+	if (vbuf->resp_size) {
+		sg_init_one(&vresp, vbuf->resp_buf, vbuf->resp_size);
+		sgs[outcnt + incnt] = &vresp;
+		incnt++;
+	}
+
+	spin_lock(&vgdev->ctrlq.qlock);
+retry:
+	ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC);
+	if (ret == -ENOSPC) {
+		spin_unlock(&vgdev->ctrlq.qlock);
+		wait_event(vgdev->ctrlq.ack_queue, vq->num_free);
+		spin_lock(&vgdev->ctrlq.qlock);
+		goto retry;
+	} else {
+		virtqueue_kick(vq);
+	}
+	spin_unlock(&vgdev->ctrlq.qlock);
+
+	if (!ret)
+		ret = vq->num_free;
+	return ret;
+}
+
+static int virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev,
+				   struct virtio_gpu_vbuffer *vbuf)
+{
+	struct virtqueue *vq = vgdev->cursorq.vq;
+	struct scatterlist *sgs[1], ccmd;
+	int ret;
+	int outcnt;
+
+	if (!vgdev->vqs_ready)
+		return -ENODEV;
+
+	sg_init_one(&ccmd, vbuf->buf, vbuf->size);
+	sgs[0] = &ccmd;
+	outcnt = 1;
+
+	spin_lock(&vgdev->cursorq.qlock);
+retry:
+	ret = virtqueue_add_sgs(vq, sgs, outcnt, 0, vbuf, GFP_ATOMIC);
+	if (ret == -ENOSPC) {
+		spin_unlock(&vgdev->cursorq.qlock);
+		wait_event(vgdev->cursorq.ack_queue, vq->num_free);
+		spin_lock(&vgdev->cursorq.qlock);
+		goto retry;
+	} else {
+		virtqueue_kick(vq);
+	}
+
+	spin_unlock(&vgdev->cursorq.qlock);
+
+	if (!ret)
+		ret = vq->num_free;
+	return ret;
+}
+
+/* just create gem objects for userspace and long lived objects,
+   just use dma_alloced pages for the queue objects? */
+
+/* create a basic resource */
+void virtio_gpu_cmd_create_resource(struct virtio_gpu_device *vgdev,
+				    uint32_t resource_id,
+				    uint32_t format,
+				    uint32_t width,
+				    uint32_t height)
+{
+	struct virtio_gpu_resource_create_2d *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->format = cpu_to_le32(format);
+	cmd_p->width = cpu_to_le32(width);
+	cmd_p->height = cpu_to_le32(height);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_unref_resource(struct virtio_gpu_device *vgdev,
+				   uint32_t resource_id)
+{
+	struct virtio_gpu_resource_unref *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_UNREF);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_resource_inval_backing(struct virtio_gpu_device *vgdev,
+					   uint32_t resource_id)
+{
+	struct virtio_gpu_resource_detach_backing *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev,
+				uint32_t scanout_id, uint32_t resource_id,
+				uint32_t width, uint32_t height,
+				uint32_t x, uint32_t y)
+{
+	struct virtio_gpu_set_scanout *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_SET_SCANOUT);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->scanout_id = cpu_to_le32(scanout_id);
+	cmd_p->r.width = cpu_to_le32(width);
+	cmd_p->r.height = cpu_to_le32(height);
+	cmd_p->r.x = cpu_to_le32(x);
+	cmd_p->r.y = cpu_to_le32(y);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_resource_flush(struct virtio_gpu_device *vgdev,
+				   uint32_t resource_id,
+				   uint32_t x, uint32_t y,
+				   uint32_t width, uint32_t height)
+{
+	struct virtio_gpu_resource_flush *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_FLUSH);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->r.width = cpu_to_le32(width);
+	cmd_p->r.height = cpu_to_le32(height);
+	cmd_p->r.x = cpu_to_le32(x);
+	cmd_p->r.y = cpu_to_le32(y);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
+					uint32_t resource_id, uint64_t offset,
+					__le32 width, __le32 height,
+					__le32 x, __le32 y,
+					struct virtio_gpu_fence **fence)
+{
+	struct virtio_gpu_transfer_to_host_2d *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->offset = cpu_to_le64(offset);
+	cmd_p->r.width = width;
+	cmd_p->r.height = height;
+	cmd_p->r.x = x;
+	cmd_p->r.y = y;
+
+	if (fence)
+		virtio_gpu_fence_emit(vgdev, &cmd_p->hdr, fence);
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+static void
+virtio_gpu_cmd_resource_attach_backing(struct virtio_gpu_device *vgdev,
+				       uint32_t resource_id,
+				       struct virtio_gpu_mem_entry *ents,
+				       uint32_t nents,
+				       struct virtio_gpu_fence **fence)
+{
+	struct virtio_gpu_resource_attach_backing *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->nr_entries = cpu_to_le32(nents);
+
+	vbuf->data_buf = ents;
+	vbuf->data_size = sizeof(*ents) * nents;
+
+	if (fence)
+		virtio_gpu_fence_emit(vgdev, &cmd_p->hdr, fence);
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+static void virtio_gpu_cmd_get_display_info_cb(struct virtio_gpu_device *vgdev,
+					       struct virtio_gpu_vbuffer *vbuf)
+{
+	struct virtio_gpu_resp_display_info *resp =
+		(struct virtio_gpu_resp_display_info *)vbuf->resp_buf;
+	int i;
+
+	spin_lock(&vgdev->display_info_lock);
+	for (i = 0; i < vgdev->num_scanouts; i++) {
+		vgdev->outputs[i].info = resp->pmodes[i];
+		if (resp->pmodes[i].enabled) {
+			DRM_DEBUG("output %d: %dx%d+%d+%d", i,
+				  le32_to_cpu(resp->pmodes[i].r.width),
+				  le32_to_cpu(resp->pmodes[i].r.height),
+				  le32_to_cpu(resp->pmodes[i].r.x),
+				  le32_to_cpu(resp->pmodes[i].r.y));
+		} else {
+			DRM_DEBUG("output %d: disabled", i);
+		}
+	}
+
+	spin_unlock(&vgdev->display_info_lock);
+	wake_up(&vgdev->resp_wq);
+
+	if (!drm_helper_hpd_irq_event(vgdev->ddev))
+		drm_kms_helper_hotplug_event(vgdev->ddev);
+}
+
+int virtio_gpu_cmd_get_display_info(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_ctrl_hdr *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+	void *resp_buf;
+
+	resp_buf = kzalloc(sizeof(struct virtio_gpu_resp_display_info),
+			   GFP_KERNEL);
+	if (!resp_buf)
+		return -ENOMEM;
+
+	cmd_p = virtio_gpu_alloc_cmd_resp
+		(vgdev, &virtio_gpu_cmd_get_display_info_cb, &vbuf,
+		 sizeof(*cmd_p), sizeof(struct virtio_gpu_resp_display_info),
+		 resp_buf);
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->type = cpu_to_le32(VIRTIO_GPU_CMD_GET_DISPLAY_INFO);
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+	return 0;
+}
+
+int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
+			     struct virtio_gpu_object *obj,
+			     uint32_t resource_id,
+			     struct virtio_gpu_fence **fence)
+{
+	struct virtio_gpu_mem_entry *ents;
+	struct scatterlist *sg;
+	int si;
+
+	if (!obj->pages) {
+		int ret;
+		ret = virtio_gpu_object_get_sg_table(vgdev, obj);
+		if (ret)
+			return ret;
+	}
+
+	/* gets freed when the ring has consumed it */
+	ents = kmalloc_array(obj->pages->nents,
+			     sizeof(struct virtio_gpu_mem_entry),
+			     GFP_KERNEL);
+	if (!ents) {
+		DRM_ERROR("failed to allocate ent list\n");
+		return -ENOMEM;
+	}
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, si) {
+		ents[si].addr = cpu_to_le64(sg_phys(sg));
+		ents[si].length = cpu_to_le32(sg->length);
+		ents[si].padding = 0;
+	}
+
+	virtio_gpu_cmd_resource_attach_backing(vgdev, resource_id,
+					       ents, obj->pages->nents,
+					       fence);
+	obj->hw_res_handle = resource_id;
+	return 0;
+}
+
+void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev,
+			    struct virtio_gpu_output *output)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+	struct virtio_gpu_update_cursor *cur_p;
+
+	output->cursor.pos.scanout_id = cpu_to_le32(output->index);
+	cur_p = virtio_gpu_alloc_cursor(vgdev, &vbuf);
+	memcpy(cur_p, &output->cursor, sizeof(output->cursor));
+	virtio_gpu_queue_cursor(vgdev, vbuf);
+}
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index df6d9970d9a4..59ce5587ed90 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -814,6 +814,7 @@ struct drm_device {
 #endif
 
 	struct platform_device *platformdev; /**< Platform device struture */
+	struct virtio_device *virtdev;
 
 	struct drm_sg_mem *sg;	/**< Scatter gather memory */
 	unsigned int num_crtcs;                  /**< Number of CRTCs on this device */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 1a0006a76b00..4460e5820b0e 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -430,6 +430,7 @@ header-y += virtio_balloon.h
 header-y += virtio_blk.h
 header-y += virtio_config.h
 header-y += virtio_console.h
+header-y += virtio_gpu.h
 header-y += virtio_ids.h
 header-y += virtio_input.h
 header-y += virtio_net.h
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
new file mode 100644
index 000000000000..571c4cbd3f93
--- /dev/null
+++ b/include/uapi/linux/virtio_gpu.h
@@ -0,0 +1,204 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This header is BSD licensed so anyone can use the definitions
+ * to implement compatible drivers/servers:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef VIRTIO_GPU_HW_H
+#define VIRTIO_GPU_HW_H
+
+enum virtio_gpu_ctrl_type {
+	VIRTIO_GPU_UNDEFINED = 0,
+
+	/* 2d commands */
+	VIRTIO_GPU_CMD_GET_DISPLAY_INFO = 0x0100,
+	VIRTIO_GPU_CMD_RESOURCE_CREATE_2D,
+	VIRTIO_GPU_CMD_RESOURCE_UNREF,
+	VIRTIO_GPU_CMD_SET_SCANOUT,
+	VIRTIO_GPU_CMD_RESOURCE_FLUSH,
+	VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D,
+	VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING,
+	VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING,
+
+	/* cursor commands */
+	VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300,
+	VIRTIO_GPU_CMD_MOVE_CURSOR,
+
+	/* success responses */
+	VIRTIO_GPU_RESP_OK_NODATA = 0x1100,
+	VIRTIO_GPU_RESP_OK_DISPLAY_INFO,
+
+	/* error responses */
+	VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
+	VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY,
+	VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID,
+	VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
+	VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
+	VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
+};
+
+#define VIRTIO_GPU_FLAG_FENCE (1 << 0)
+
+struct virtio_gpu_ctrl_hdr {
+	__le32 type;
+	__le32 flags;
+	__le64 fence_id;
+	__le32 ctx_id;
+	__le32 padding;
+};
+
+/* data passed in the cursor vq */
+
+struct virtio_gpu_cursor_pos {
+	__le32 scanout_id;
+	__le32 x;
+	__le32 y;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_UPDATE_CURSOR, VIRTIO_GPU_CMD_MOVE_CURSOR */
+struct virtio_gpu_update_cursor {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_cursor_pos pos;  /* update & move */
+	__le32 resource_id;           /* update only */
+	__le32 hot_x;                 /* update only */
+	__le32 hot_y;                 /* update only */
+	__le32 padding;
+};
+
+/* data passed in the control vq, 2d related */
+
+struct virtio_gpu_rect {
+	__le32 x;
+	__le32 y;
+	__le32 width;
+	__le32 height;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_UNREF */
+struct virtio_gpu_resource_unref {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: create a 2d resource with a format */
+struct virtio_gpu_resource_create_2d {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 format;
+	__le32 width;
+	__le32 height;
+};
+
+/* VIRTIO_GPU_CMD_SET_SCANOUT */
+struct virtio_gpu_set_scanout {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_rect r;
+	__le32 scanout_id;
+	__le32 resource_id;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_FLUSH */
+struct virtio_gpu_resource_flush {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_rect r;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: simple transfer to_host */
+struct virtio_gpu_transfer_to_host_2d {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_rect r;
+	__le64 offset;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+struct virtio_gpu_mem_entry {
+	__le64 addr;
+	__le32 length;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING */
+struct virtio_gpu_resource_attach_backing {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 nr_entries;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING */
+struct virtio_gpu_resource_detach_backing {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_RESP_OK_DISPLAY_INFO */
+#define VIRTIO_GPU_MAX_SCANOUTS 16
+struct virtio_gpu_resp_display_info {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_display_one {
+		struct virtio_gpu_rect r;
+		__le32 enabled;
+		__le32 flags;
+	} pmodes[VIRTIO_GPU_MAX_SCANOUTS];
+};
+
+#define VIRTIO_GPU_EVENT_DISPLAY (1 << 0)
+
+struct virtio_gpu_config {
+	__u32 events_read;
+	__u32 events_clear;
+	__u32 num_scanouts;
+	__u32 reserved;
+};
+
+/* simple formats for fbcon/X use */
+enum virtio_gpu_formats {
+	VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM  = 1,
+	VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM  = 2,
+	VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM  = 3,
+	VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM  = 4,
+
+	VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM  = 67,
+	VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM  = 68,
+
+	VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM  = 121,
+	VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM  = 134,
+};
+
+#endif
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 5f60aa4be50a..77925f587b15 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -39,6 +39,7 @@
 #define VIRTIO_ID_9P		9 /* 9p virtio console */
 #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
 #define VIRTIO_ID_CAIF	       12 /* Virtio caif */
+#define VIRTIO_ID_GPU          16 /* virtio GPU */
 #define VIRTIO_ID_INPUT        18 /* virtio input */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
-- 
cgit v1.2.3


From 3896d655f4d491c67d669a15f275a39f713410f8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 2 Jun 2015 16:03:14 -0700
Subject: bpf: introduce bpf_clone_redirect() helper

Allow eBPF programs attached to classifier/actions to call
bpf_clone_redirect(skb, ifindex, flags) helper which will
mirror or redirect the packet by dynamic ifindex selection
from within the program to a target device either at ingress
or at egress. Can be used for various scenarios, for example,
to load balance skbs into veths, split parts of the traffic
to local taps, etc.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 10 ++++++++++
 net/core/filter.c        | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 72f3080afa1e..42aa19abab86 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -220,6 +220,16 @@ enum bpf_func_id {
 	 * Return: 0 on success
 	 */
 	BPF_FUNC_tail_call,
+
+	/**
+	 * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev
+	 * @skb: pointer to skb
+	 * @ifindex: ifindex of the net device
+	 * @flags: bit 0 - if set, redirect to ingress instead of egress
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_clone_redirect,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index b78a010a957f..64c121c09655 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -46,6 +46,7 @@
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
+#include <net/sch_generic.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
@@ -1407,6 +1408,43 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+#define BPF_IS_REDIRECT_INGRESS(flags)	((flags) & 1)
+
+static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
+	struct net_device *dev;
+
+	dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
+	if (unlikely(!dev))
+		return -EINVAL;
+
+	if (unlikely(!(dev->flags & IFF_UP)))
+		return -EINVAL;
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (unlikely(!skb2))
+		return -ENOMEM;
+
+	if (G_TC_AT(skb2->tc_verd) & AT_INGRESS)
+		skb_push(skb2, skb2->mac_len);
+
+	if (BPF_IS_REDIRECT_INGRESS(flags))
+		return dev_forward_skb(dev, skb2);
+
+	skb2->dev = dev;
+	return dev_queue_xmit(skb2);
+}
+
+const struct bpf_func_proto bpf_clone_redirect_proto = {
+	.func           = bpf_clone_redirect,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -1440,6 +1478,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace:
 		return &bpf_l4_csum_replace_proto;
+	case BPF_FUNC_clone_redirect:
+		return &bpf_clone_redirect_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
-- 
cgit v1.2.3


From f077825a8758d79838a757dafb79adcdd047ef3a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 1 Apr 2015 15:06:40 +0200
Subject: KVM: x86: API changes for SMM support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch includes changes to the external API for SMM support.
Userspace can predicate the availability of the new fields and
ioctls on a new capability, KVM_CAP_X86_SMM, which is added at the end
of the patch series.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 40 +++++++++++++++++++++++++++++++++------
 arch/x86/include/asm/kvm_host.h   |  3 +++
 arch/x86/include/uapi/asm/kvm.h   | 11 ++++++++++-
 arch/x86/kvm/kvm_cache_regs.h     |  5 +++++
 arch/x86/kvm/lapic.h              |  5 +++++
 arch/x86/kvm/x86.c                | 40 +++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/kvm.h          |  5 ++++-
 7 files changed, 99 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 695544420ff2..2ddefd58b1aa 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -820,11 +820,21 @@ struct kvm_vcpu_events {
 	} nmi;
 	__u32 sipi_vector;
 	__u32 flags;
+	struct {
+		__u8 smm;
+		__u8 pending;
+		__u8 smm_inside_nmi;
+		__u8 latched_init;
+	} smi;
 };
 
-KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
-interrupt.shadow contains a valid state. Otherwise, this field is undefined.
+Only two fields are defined in the flags field:
+
+- KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
+  interrupt.shadow contains a valid state.
 
+- KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
+  smi contains a valid state.
 
 4.32 KVM_SET_VCPU_EVENTS
 
@@ -841,17 +851,20 @@ vcpu.
 See KVM_GET_VCPU_EVENTS for the data structure.
 
 Fields that may be modified asynchronously by running VCPUs can be excluded
-from the update. These fields are nmi.pending and sipi_vector. Keep the
-corresponding bits in the flags field cleared to suppress overwriting the
-current in-kernel state. The bits are:
+from the update. These fields are nmi.pending, sipi_vector, smi.smm,
+smi.pending. Keep the corresponding bits in the flags field cleared to
+suppress overwriting the current in-kernel state. The bits are:
 
 KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
 KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
+KVM_VCPUEVENT_VALID_SMM         - transfer the smi sub-struct.
 
 If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
 the flags field to signal that interrupt.shadow contains a valid state and
 shall be written into the VCPU.
 
+KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
+
 
 4.33 KVM_GET_DEBUGREGS
 
@@ -2979,6 +2992,16 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
+4.90 KVM_SMI
+
+Capability: KVM_CAP_X86_SMM
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+
+Queues an SMI on the thread's vcpu.
+
 5. The kvm_run structure
 ------------------------
 
@@ -3014,7 +3037,12 @@ an interrupt can be injected now with KVM_INTERRUPT.
 The value of the current interrupt flag.  Only valid if in-kernel
 local APIC is not used.
 
-	__u8 padding2[2];
+	__u16 flags;
+
+More architecture-specific flags detailing state of the VCPU that may
+affect the device's behavior.  The only currently defined flag is
+KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
+VCPU is in system management mode.
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4e299fcd0eb6..d52d7aea375f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -471,6 +471,7 @@ struct kvm_vcpu_arch {
 	atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
 	unsigned nmi_pending; /* NMI queued after currently running handler */
 	bool nmi_injected;    /* Trying to inject an NMI this entry */
+	bool smi_pending;    /* SMI queued after currently running handler */
 
 	struct mtrr_state_type mtrr_state;
 	u64 pat;
@@ -1115,6 +1116,8 @@ enum {
 #define HF_NMI_MASK		(1 << 3)
 #define HF_IRET_MASK		(1 << 4)
 #define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
+#define HF_SMM_MASK		(1 << 6)
+#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 2fec75e4b1e1..a4ae82eb82aa 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -106,6 +106,8 @@ struct kvm_ioapic_state {
 #define KVM_IRQCHIP_IOAPIC       2
 #define KVM_NR_IRQCHIPS          3
 
+#define KVM_RUN_X86_SMM		 (1 << 0)
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
@@ -281,6 +283,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_NMI_PENDING	0x00000001
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
+#define KVM_VCPUEVENT_VALID_SMM		0x00000008
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS	0x01
@@ -309,7 +312,13 @@ struct kvm_vcpu_events {
 	} nmi;
 	__u32 sipi_vector;
 	__u32 flags;
-	__u32 reserved[10];
+	struct {
+		__u8 smm;
+		__u8 pending;
+		__u8 smm_inside_nmi;
+		__u8 latched_init;
+	} smi;
+	__u32 reserved[9];
 };
 
 /* for KVM_GET/SET_DEBUGREGS */
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 544076c4f44b..e1e89ee4af75 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -99,4 +99,9 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
 	return vcpu->arch.hflags & HF_GUEST_MASK;
 }
 
+static inline bool is_smm(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hflags & HF_SMM_MASK;
+}
+
 #endif
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 9d8fcde52027..f2f4e10ab772 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -159,6 +159,11 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
 			irq->msi_redir_hint);
 }
 
+static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+}
+
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void wait_lapic_expire(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7aec25f2f45c..aa46ac1ff48b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3101,6 +3101,11 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -3206,8 +3211,15 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 
 	events->sipi_vector = 0; /* never valid when reporting to user space */
 
+	events->smi.smm = is_smm(vcpu);
+	events->smi.pending = vcpu->arch.smi_pending;
+	events->smi.smm_inside_nmi =
+		!!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
+	events->smi.latched_init = kvm_lapic_latched_init(vcpu);
+
 	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
-			 | KVM_VCPUEVENT_VALID_SHADOW);
+			 | KVM_VCPUEVENT_VALID_SHADOW
+			 | KVM_VCPUEVENT_VALID_SMM);
 	memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
@@ -3216,7 +3228,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 {
 	if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
 			      | KVM_VCPUEVENT_VALID_SIPI_VECTOR
-			      | KVM_VCPUEVENT_VALID_SHADOW))
+			      | KVM_VCPUEVENT_VALID_SHADOW
+			      | KVM_VCPUEVENT_VALID_SMM))
 		return -EINVAL;
 
 	process_nmi(vcpu);
@@ -3241,6 +3254,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 	    kvm_vcpu_has_lapic(vcpu))
 		vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
+	if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+		if (events->smi.smm)
+			vcpu->arch.hflags |= HF_SMM_MASK;
+		else
+			vcpu->arch.hflags &= ~HF_SMM_MASK;
+		vcpu->arch.smi_pending = events->smi.pending;
+		if (events->smi.smm_inside_nmi)
+			vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+		else
+			vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
+		if (kvm_vcpu_has_lapic(vcpu)) {
+			if (events->smi.latched_init)
+				set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+			else
+				clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+		}
+	}
+
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
 	return 0;
@@ -3500,6 +3531,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_nmi(vcpu);
 		break;
 	}
+	case KVM_SMI: {
+		r = kvm_vcpu_ioctl_smi(vcpu);
+		break;
+	}
 	case KVM_SET_CPUID: {
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
@@ -6182,6 +6217,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 	struct kvm_run *kvm_run = vcpu->run;
 
 	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
 	if (irqchip_in_kernel(vcpu->kvm))
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 75bd9f7fd846..eace8babd227 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -202,7 +202,7 @@ struct kvm_run {
 	__u32 exit_reason;
 	__u8 ready_for_interrupt_injection;
 	__u8 if_flag;
-	__u8 padding2[2];
+	__u16 flags;
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
@@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_IRQ_STATE 114
 #define KVM_CAP_PPC_HWRNG 115
 #define KVM_CAP_DISABLE_QUIRKS 116
+#define KVM_CAP_X86_SMM 117
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1200,6 +1201,8 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_S390_IRQ_STATE */
 #define KVM_S390_SET_IRQ_STATE	  _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
 #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
+/* Available with KVM_CAP_X86_SMM */
+#define KVM_SMI                   _IO(KVMIO,   0xb7)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
cgit v1.2.3


From 730fc4371333636a00fed32c587fc1e85c5367e2 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Thu, 4 Jun 2015 09:16:37 -0700
Subject: mpls: Add definition for IPPROTO_MPLS

Add uapi define for MPLS over IP.

Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 589ced069e8a..641338bef651 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -69,6 +69,8 @@ enum {
 #define IPPROTO_SCTP		IPPROTO_SCTP
   IPPROTO_UDPLITE = 136,	/* UDP-Lite (RFC 3828)			*/
 #define IPPROTO_UDPLITE		IPPROTO_UDPLITE
+  IPPROTO_MPLS = 137,		/* MPLS in IP (RFC 4023)		*/
+#define IPPROTO_MPLS		IPPROTO_MPLS
   IPPROTO_RAW = 255,		/* Raw IP packets			*/
 #define IPPROTO_RAW		IPPROTO_RAW
   IPPROTO_MAX
-- 
cgit v1.2.3


From ae45577324d1f749c907840247d443696ac3bc7a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 5 Jun 2015 12:31:12 +1000
Subject: virtgpu: include linux/types.h to avoid warning.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/uapi/linux/virtio_gpu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 571c4cbd3f93..478be5270e26 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -38,6 +38,8 @@
 #ifndef VIRTIO_GPU_HW_H
 #define VIRTIO_GPU_HW_H
 
+#include <linux/types.h>
+
 enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_UNDEFINED = 0,
 
-- 
cgit v1.2.3


From 74fdcb2ee1786a92584e89c91006e449813527ce Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 28 Apr 2015 08:49:09 -0300
Subject: [media] videodev2.h: add support for transfer functions

In the past the transfer function was implied by the colorspace. However,
it is an independent entity in its own right. Add support for explicitly
choosing the transfer function.

This change will allow us to represent linear RGB (as is used by openGL), and
it will make it easier to work with decoded video material since most codecs
store the transfer function as a separate property as well.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/v4l2-core/v4l2-ioctl.c |  9 ++++----
 include/media/v4l2-mediabus.h        |  2 ++
 include/uapi/linux/v4l2-mediabus.h   |  4 +++-
 include/uapi/linux/videodev2.h       | 41 +++++++++++++++++++++++++++++++++++-
 4 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 368bc3a973d4..691295984225 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -257,7 +257,8 @@ static void v4l_print_format(const void *arg, bool write_only)
 		pr_cont(", width=%u, height=%u, "
 			"pixelformat=%c%c%c%c, field=%s, "
 			"bytesperline=%u, sizeimage=%u, colorspace=%d, "
-			"flags=0x%x, ycbcr_enc=%u, quantization=%u\n",
+			"flags=0x%x, ycbcr_enc=%u, quantization=%u, "
+			"xfer_func=%u\n",
 			pix->width, pix->height,
 			(pix->pixelformat & 0xff),
 			(pix->pixelformat >>  8) & 0xff,
@@ -266,7 +267,7 @@ static void v4l_print_format(const void *arg, bool write_only)
 			prt_names(pix->field, v4l2_field_names),
 			pix->bytesperline, pix->sizeimage,
 			pix->colorspace, pix->flags, pix->ycbcr_enc,
-			pix->quantization);
+			pix->quantization, pix->xfer_func);
 		break;
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
 	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
@@ -274,7 +275,7 @@ static void v4l_print_format(const void *arg, bool write_only)
 		pr_cont(", width=%u, height=%u, "
 			"format=%c%c%c%c, field=%s, "
 			"colorspace=%d, num_planes=%u, flags=0x%x, "
-			"ycbcr_enc=%u, quantization=%u\n",
+			"ycbcr_enc=%u, quantization=%u, xfer_func=%u\n",
 			mp->width, mp->height,
 			(mp->pixelformat & 0xff),
 			(mp->pixelformat >>  8) & 0xff,
@@ -282,7 +283,7 @@ static void v4l_print_format(const void *arg, bool write_only)
 			(mp->pixelformat >> 24) & 0xff,
 			prt_names(mp->field, v4l2_field_names),
 			mp->colorspace, mp->num_planes, mp->flags,
-			mp->ycbcr_enc, mp->quantization);
+			mp->ycbcr_enc, mp->quantization, mp->xfer_func);
 		for (i = 0; i < mp->num_planes; i++)
 			printk(KERN_DEBUG "plane %u: bytesperline=%u sizeimage=%u\n", i,
 					mp->plane_fmt[i].bytesperline,
diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index 38d960d8dccd..73069e4c2796 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -96,6 +96,7 @@ static inline void v4l2_fill_pix_format(struct v4l2_pix_format *pix_fmt,
 	pix_fmt->colorspace = mbus_fmt->colorspace;
 	pix_fmt->ycbcr_enc = mbus_fmt->ycbcr_enc;
 	pix_fmt->quantization = mbus_fmt->quantization;
+	pix_fmt->xfer_func = mbus_fmt->xfer_func;
 }
 
 static inline void v4l2_fill_mbus_format(struct v4l2_mbus_framefmt *mbus_fmt,
@@ -108,6 +109,7 @@ static inline void v4l2_fill_mbus_format(struct v4l2_mbus_framefmt *mbus_fmt,
 	mbus_fmt->colorspace = pix_fmt->colorspace;
 	mbus_fmt->ycbcr_enc = pix_fmt->ycbcr_enc;
 	mbus_fmt->quantization = pix_fmt->quantization;
+	mbus_fmt->xfer_func = pix_fmt->xfer_func;
 	mbus_fmt->code = code;
 }
 
diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h
index 26db20647e6f..9cac6325cc7e 100644
--- a/include/uapi/linux/v4l2-mediabus.h
+++ b/include/uapi/linux/v4l2-mediabus.h
@@ -24,6 +24,7 @@
  * @colorspace:	colorspace of the data (from enum v4l2_colorspace)
  * @ycbcr_enc:	YCbCr encoding of the data (from enum v4l2_ycbcr_encoding)
  * @quantization: quantization of the data (from enum v4l2_quantization)
+ * @xfer_func:  transfer function of the data (from enum v4l2_xfer_func)
  */
 struct v4l2_mbus_framefmt {
 	__u32			width;
@@ -33,7 +34,8 @@ struct v4l2_mbus_framefmt {
 	__u32			colorspace;
 	__u16			ycbcr_enc;
 	__u16			quantization;
-	__u32			reserved[6];
+	__u16			xfer_func;
+	__u16			reserved[11];
 };
 
 #ifndef __KERNEL__
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 003a91292a4f..3d5fc72d53a7 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -240,6 +240,42 @@ enum v4l2_colorspace {
 	((is_sdtv) ? V4L2_COLORSPACE_SMPTE170M : \
 	 ((is_hdtv) ? V4L2_COLORSPACE_REC709 : V4L2_COLORSPACE_SRGB))
 
+enum v4l2_xfer_func {
+	/*
+	 * Mapping of V4L2_XFER_FUNC_DEFAULT to actual transfer functions
+	 * for the various colorspaces:
+	 *
+	 * V4L2_COLORSPACE_SMPTE170M, V4L2_COLORSPACE_470_SYSTEM_M,
+	 * V4L2_COLORSPACE_470_SYSTEM_BG, V4L2_COLORSPACE_REC709 and
+	 * V4L2_COLORSPACE_BT2020: V4L2_XFER_FUNC_709
+	 *
+	 * V4L2_COLORSPACE_SRGB, V4L2_COLORSPACE_JPEG: V4L2_XFER_FUNC_SRGB
+	 *
+	 * V4L2_COLORSPACE_ADOBERGB: V4L2_XFER_FUNC_ADOBERGB
+	 *
+	 * V4L2_COLORSPACE_SMPTE240M: V4L2_XFER_FUNC_SMPTE240M
+	 *
+	 * V4L2_COLORSPACE_RAW: V4L2_XFER_FUNC_NONE
+	 */
+	V4L2_XFER_FUNC_DEFAULT     = 0,
+	V4L2_XFER_FUNC_709         = 1,
+	V4L2_XFER_FUNC_SRGB        = 2,
+	V4L2_XFER_FUNC_ADOBERGB    = 3,
+	V4L2_XFER_FUNC_SMPTE240M   = 4,
+	V4L2_XFER_FUNC_NONE        = 5,
+};
+
+/*
+ * Determine how XFER_FUNC_DEFAULT should map to a proper transfer function.
+ * This depends on the colorspace.
+ */
+#define V4L2_MAP_XFER_FUNC_DEFAULT(colsp) \
+	((colsp) == V4L2_XFER_FUNC_ADOBERGB ? V4L2_XFER_FUNC_ADOBERGB : \
+	 ((colsp) == V4L2_COLORSPACE_SMPTE240M ? V4L2_XFER_FUNC_SMPTE240M : \
+	  ((colsp) == V4L2_COLORSPACE_RAW ? V4L2_XFER_FUNC_NONE : \
+	   ((colsp) == V4L2_COLORSPACE_SRGB || (colsp) == V4L2_COLORSPACE_JPEG ? \
+	    V4L2_XFER_FUNC_SRGB : V4L2_XFER_FUNC_709))))
+
 enum v4l2_ycbcr_encoding {
 	/*
 	 * Mapping of V4L2_YCBCR_ENC_DEFAULT to actual encodings for the
@@ -409,6 +445,7 @@ struct v4l2_pix_format {
 	__u32			flags;		/* format flags (V4L2_PIX_FMT_FLAG_*) */
 	__u32			ycbcr_enc;	/* enum v4l2_ycbcr_encoding */
 	__u32			quantization;	/* enum v4l2_quantization */
+	__u32			xfer_func;	/* enum v4l2_xfer_func */
 };
 
 /*      Pixel format         FOURCC                          depth  Description  */
@@ -1907,6 +1944,7 @@ struct v4l2_plane_pix_format {
  * @flags:		format flags (V4L2_PIX_FMT_FLAG_*)
  * @ycbcr_enc:		enum v4l2_ycbcr_encoding, Y'CbCr encoding
  * @quantization:	enum v4l2_quantization, colorspace quantization
+ * @xfer_func:		enum v4l2_xfer_func, colorspace transfer function
  */
 struct v4l2_pix_format_mplane {
 	__u32				width;
@@ -1920,7 +1958,8 @@ struct v4l2_pix_format_mplane {
 	__u8				flags;
 	__u8				ycbcr_enc;
 	__u8				quantization;
-	__u8				reserved[8];
+	__u8				xfer_func;
+	__u8				reserved[7];
 } __attribute__ ((packed));
 
 /**
-- 
cgit v1.2.3


From f481b069e674378758c73761827e83ab05c46b52 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 May 2015 17:30:37 +0200
Subject: KVM: implement multiple address spaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only two ioctls have to be modified; the address space id is
placed in the higher 16 bits of their slot id argument.

As of this patch, no architecture defines more than one
address space; x86 will be the first.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt        | 12 ++++++
 arch/powerpc/include/asm/kvm_book3s_64.h |  2 +-
 include/linux/kvm_host.h                 | 26 ++++++++++--
 include/uapi/linux/kvm.h                 |  1 +
 virt/kvm/kvm_main.c                      | 70 ++++++++++++++++++++------------
 5 files changed, 79 insertions(+), 32 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 2ddefd58b1aa..461956a0ee8e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -254,6 +254,11 @@ since the last call to this ioctl.  Bit 0 is the first page in the
 memory slot.  Ensure the entire structure is cleared to avoid padding
 issues.
 
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
+the address space for which you want to return the dirty bitmap.
+They must be less than the value that KVM_CHECK_EXTENSION returns for
+the KVM_CAP_MULTI_ADDRESS_SPACE capability.
+
 
 4.9 KVM_SET_MEMORY_ALIAS
 
@@ -924,6 +929,13 @@ slot.  When changing an existing slot, it may be moved in the guest
 physical memory space, or its flags may be modified.  It may not be
 resized.  Slots may not overlap in guest physical address space.
 
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
+specifies the address space which is being modified.  They must be
+less than the value that KVM_CHECK_EXTENSION returns for the
+KVM_CAP_MULTI_ADDRESS_SPACE capability.  Slots in separate address spaces
+are unrelated; the restriction on overlapping slots only applies within
+each address space.
+
 Memory for the region is taken starting at the address denoted by the
 field userspace_addr, which must point at user addressable memory for
 the entire memory slot size.  Any object may back this memory, including
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 3536d12eb798..2aa79c864e91 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -430,7 +430,7 @@ static inline void note_hpte_modification(struct kvm *kvm,
  */
 static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
 {
-	return rcu_dereference_raw_notrace(kvm->memslots);
+	return rcu_dereference_raw_notrace(kvm->memslots[0]);
 }
 
 extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ba1ea43998e4..9564fd78c547 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -44,6 +44,10 @@
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
 
+#ifndef KVM_ADDRESS_SPACE_NUM
+#define KVM_ADDRESS_SPACE_NUM	1
+#endif
+
 /*
  * For the normal pfn, the highest 12 bits should be zero,
  * so we can mask bit 62 ~ bit 52  to indicate the error pfn,
@@ -331,6 +335,13 @@ struct kvm_kernel_irq_routing_entry {
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 #endif
 
+#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+#endif
+
 /*
  * Note:
  * memslots are not sorted by id anymore, please use id_to_memslot()
@@ -349,7 +360,7 @@ struct kvm {
 	spinlock_t mmu_lock;
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
-	struct kvm_memslots *memslots;
+	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
@@ -464,16 +475,23 @@ void kvm_exit(void);
 void kvm_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
 
-static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
-	return rcu_dereference_check(kvm->memslots,
+	return rcu_dereference_check(kvm->memslots[as_id],
 			srcu_read_lock_held(&kvm->srcu)
 			|| lockdep_is_held(&kvm->slots_lock));
 }
 
+static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+{
+	return __kvm_memslots(kvm, 0);
+}
+
 static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
 {
-	return kvm_memslots(vcpu->kvm);
+	int as_id = kvm_arch_vcpu_memslots_id(vcpu);
+
+	return __kvm_memslots(vcpu->kvm, as_id);
 }
 
 static inline struct kvm_memory_slot *
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index eace8babd227..5ff1038437e3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -816,6 +816,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_HWRNG 115
 #define KVM_CAP_DISABLE_QUIRKS 116
 #define KVM_CAP_X86_SMM 117
+#define KVM_CAP_MULTI_ADDRESS_SPACE 118
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3a121cedcc77..848af90b8091 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -518,9 +518,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
 
 	r = -ENOMEM;
-	kvm->memslots = kvm_alloc_memslots();
-	if (!kvm->memslots)
-		goto out_err_no_srcu;
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		kvm->memslots[i] = kvm_alloc_memslots();
+		if (!kvm->memslots[i])
+			goto out_err_no_srcu;
+	}
 
 	if (init_srcu_struct(&kvm->srcu))
 		goto out_err_no_srcu;
@@ -562,7 +564,8 @@ out_err_no_srcu:
 out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm->buses[i]);
-	kvm_free_memslots(kvm, kvm->memslots);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+		kvm_free_memslots(kvm, kvm->memslots[i]);
 	kvm_arch_free_vm(kvm);
 	return ERR_PTR(r);
 }
@@ -612,7 +615,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #endif
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
-	kvm_free_memslots(kvm, kvm->memslots);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+		kvm_free_memslots(kvm, kvm->memslots[i]);
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
@@ -729,9 +733,9 @@ static int check_memory_region_flags(const struct kvm_userspace_memory_region *m
 }
 
 static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
-		struct kvm_memslots *slots)
+		int as_id, struct kvm_memslots *slots)
 {
-	struct kvm_memslots *old_memslots = kvm_memslots(kvm);
+	struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
 
 	/*
 	 * Set the low bit in the generation, which disables SPTE caching
@@ -740,7 +744,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	WARN_ON(old_memslots->generation & 1);
 	slots->generation = old_memslots->generation + 1;
 
-	rcu_assign_pointer(kvm->memslots, slots);
+	rcu_assign_pointer(kvm->memslots[as_id], slots);
 	synchronize_srcu_expedited(&kvm->srcu);
 
 	/*
@@ -772,6 +776,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	struct kvm_memory_slot *slot;
 	struct kvm_memory_slot old, new;
 	struct kvm_memslots *slots = NULL, *old_memslots;
+	int as_id, id;
 	enum kvm_mr_change change;
 
 	r = check_memory_region_flags(mem);
@@ -779,24 +784,27 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		goto out;
 
 	r = -EINVAL;
+	as_id = mem->slot >> 16;
+	id = (u16)mem->slot;
+
 	/* General sanity checks */
 	if (mem->memory_size & (PAGE_SIZE - 1))
 		goto out;
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		goto out;
 	/* We can read the guest memory with __xxx_user() later on. */
-	if ((mem->slot < KVM_USER_MEM_SLOTS) &&
+	if ((id < KVM_USER_MEM_SLOTS) &&
 	    ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
 	     !access_ok(VERIFY_WRITE,
 			(void __user *)(unsigned long)mem->userspace_addr,
 			mem->memory_size)))
 		goto out;
-	if (mem->slot >= KVM_MEM_SLOTS_NUM)
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
 		goto out;
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
 		goto out;
 
-	slot = id_to_memslot(kvm_memslots(kvm), mem->slot);
+	slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
@@ -805,7 +813,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	new = old = *slot;
 
-	new.id = mem->slot;
+	new.id = id;
 	new.base_gfn = base_gfn;
 	new.npages = npages;
 	new.flags = mem->flags;
@@ -840,9 +848,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
 		/* Check for overlaps */
 		r = -EEXIST;
-		kvm_for_each_memslot(slot, kvm_memslots(kvm)) {
+		kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
 			if ((slot->id >= KVM_USER_MEM_SLOTS) ||
-			    (slot->id == mem->slot))
+			    (slot->id == id))
 				continue;
 			if (!((base_gfn + npages <= slot->base_gfn) ||
 			      (base_gfn >= slot->base_gfn + slot->npages)))
@@ -871,13 +879,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
 	if (!slots)
 		goto out_free;
-	memcpy(slots, kvm_memslots(kvm), sizeof(struct kvm_memslots));
+	memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
 
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		slot = id_to_memslot(slots, mem->slot);
+		slot = id_to_memslot(slots, id);
 		slot->flags |= KVM_MEMSLOT_INVALID;
 
-		old_memslots = install_new_memslots(kvm, slots);
+		old_memslots = install_new_memslots(kvm, as_id, slots);
 
 		/* slot was deleted or moved, clear iommu mapping */
 		kvm_iommu_unmap_pages(kvm, &old);
@@ -909,7 +917,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	update_memslots(slots, &new);
-	old_memslots = install_new_memslots(kvm, slots);
+	old_memslots = install_new_memslots(kvm, as_id, slots);
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, &new, change);
 
@@ -956,7 +964,7 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region);
 static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 					  struct kvm_userspace_memory_region *mem)
 {
-	if (mem->slot >= KVM_USER_MEM_SLOTS)
+	if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
 		return -EINVAL;
 
 	return kvm_set_memory_region(kvm, mem);
@@ -967,16 +975,18 @@ int kvm_get_dirty_log(struct kvm *kvm,
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
-	int r, i;
+	int r, i, as_id, id;
 	unsigned long n;
 	unsigned long any = 0;
 
 	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
+	as_id = log->slot >> 16;
+	id = (u16)log->slot;
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
 		goto out;
 
-	slots = kvm_memslots(kvm);
-	memslot = id_to_memslot(slots, log->slot);
+	slots = __kvm_memslots(kvm, as_id);
+	memslot = id_to_memslot(slots, id);
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -1027,17 +1037,19 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
-	int r, i;
+	int r, i, as_id, id;
 	unsigned long n;
 	unsigned long *dirty_bitmap;
 	unsigned long *dirty_bitmap_buffer;
 
 	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
+	as_id = log->slot >> 16;
+	id = (u16)log->slot;
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
 		goto out;
 
-	slots = kvm_memslots(kvm);
-	memslot = id_to_memslot(slots, log->slot);
+	slots = __kvm_memslots(kvm, as_id);
+	memslot = id_to_memslot(slots, id);
 
 	dirty_bitmap = memslot->dirty_bitmap;
 	r = -ENOENT;
@@ -2619,6 +2631,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 	case KVM_CAP_IRQ_ROUTING:
 		return KVM_MAX_IRQ_ROUTES;
+#endif
+#if KVM_ADDRESS_SPACE_NUM > 1
+	case KVM_CAP_MULTI_ADDRESS_SPACE:
+		return KVM_ADDRESS_SPACE_NUM;
 #endif
 	default:
 		break;
-- 
cgit v1.2.3


From 4cc06521ee1f153e0d292413a5bff7bbbdee92d0 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 5 Jun 2015 10:30:08 -0600
Subject: NVMe: add sysfs and ioctl controller reset

We need the ability to perform an nvme controller reset as discussed on
the mailing list thread:

  http://lists.infradead.org/pipermail/linux-nvme/2015-March/001585.html

This adds a sysfs entry that when written to will reset perform an NVMe
controller reset if the controller was successfully initialized in the
first place.

This also adds locking around resetting the device in the async probe
method so the driver can't schedule two resets.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Cc: Brandon Schultz <brandon.schulz@hgst.com>
Cc: David Sariel <david.sariel@pmcs.com>

Updated by Jens to:

1) Merge this with the ioctl reset patch from David Sariel. The ioctl
   path now shares the reset code from the sysfs path.

2) Don't flush work if we fail issuing the reset.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/nvme.h |  1 +
 2 files changed, 54 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 513908ff46c4..9682e29b4171 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -80,6 +80,7 @@ static wait_queue_head_t nvme_kthread_wait;
 static struct class *nvme_class;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
+static int nvme_reset(struct nvme_dev *dev);
 static int nvme_process_cq(struct nvme_queue *nvmeq);
 
 struct async_cmd_info {
@@ -2689,6 +2690,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -ENOTTY;
 		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
 		return nvme_user_cmd(dev, ns, (void __user *)arg);
+	case NVME_IOCTL_RESET:
+		dev_warn(dev->dev, "resetting controller\n");
+		return nvme_reset(dev);
 	default:
 		return -ENOTTY;
 	}
@@ -2839,6 +2843,44 @@ static void nvme_reset_workfn(struct work_struct *work)
 	dev->reset_workfn(work);
 }
 
+static int nvme_reset(struct nvme_dev *dev)
+{
+	int ret = -EBUSY;
+
+	if (!dev->admin_q || blk_queue_dying(dev->admin_q))
+		return -ENODEV;
+
+	spin_lock(&dev_list_lock);
+	if (!work_pending(&dev->reset_work)) {
+		dev->reset_workfn = nvme_reset_failed_dev;
+		queue_work(nvme_workq, &dev->reset_work);
+		ret = 0;
+	}
+	spin_unlock(&dev_list_lock);
+
+	if (!ret) {
+		flush_work(&dev->reset_work);
+		return 0;
+	}
+
+	return ret;
+}
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct nvme_dev *ndev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = nvme_reset(ndev);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
 static void nvme_async_probe(struct work_struct *work);
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
@@ -2883,12 +2925,20 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto release_pools;
 	}
 	get_device(dev->device);
+	dev_set_drvdata(dev->device, dev);
+
+	result = device_create_file(dev->device, &dev_attr_reset_controller);
+	if (result)
+		goto put_dev;
 
 	INIT_LIST_HEAD(&dev->node);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
 
+ put_dev:
+	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+	put_device(dev->device);
  release_pools:
 	nvme_release_prp_pools(dev);
  release:
@@ -2919,10 +2969,12 @@ static void nvme_async_probe(struct work_struct *work)
 	nvme_set_irq_hints(dev);
 	return;
  reset:
+	spin_lock(&dev_list_lock);
 	if (!work_busy(&dev->reset_work)) {
 		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
 	}
+	spin_unlock(&dev_list_lock);
 }
 
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -2952,6 +3004,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->probe_work);
 	flush_work(&dev->reset_work);
+	device_remove_file(dev->device, &dev_attr_reset_controller);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
 	nvme_dev_remove_admin(dev);
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index aef9a81b2d75..b660dc2fadfb 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -579,5 +579,6 @@ struct nvme_passthru_cmd {
 #define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
 #define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
 #define NVME_IOCTL_IO_CMD	_IOWR('N', 0x43, struct nvme_passthru_cmd)
+#define NVME_IOCTL_RESET	_IO('N', 0x44)
 
 #endif /* _UAPI_LINUX_NVME_H */
-- 
cgit v1.2.3


From a5768aa887fb636f0cc4c83a2f1242506aaf50f6 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 1 Jun 2015 14:28:14 -0600
Subject: NVMe: Automatic namespace rescan

Namespaces may be dynamically allocated and deleted or attached and
detached. This has the driver rescan the device for namespace changes
after each device reset or namespace change asynchronous event.

There could potentially be many detached namespaces that we don't want
polluting /dev/ with unusable block handles, so this will delete disks
if the namespace is not active as indicated by the response from identify
namespace. This also skips adding the disk if no capacity is provisioned
to the namespace in the first place.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 159 ++++++++++++++++++++++++++++++++++++----------
 include/linux/nvme.h      |   1 +
 include/uapi/linux/nvme.h |   4 ++
 3 files changed, 132 insertions(+), 32 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index cae7cac6cc43..2072ae81c13a 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -29,6 +29,7 @@
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/kernel.h>
+#include <linux/list_sort.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -300,9 +301,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
 		++nvmeq->dev->event_limit;
-	if (status == NVME_SC_SUCCESS)
-		dev_warn(nvmeq->q_dmadev,
-			"async event result %08x\n", result);
+	if (status != NVME_SC_SUCCESS)
+		return;
+
+	switch (result & 0xff07) {
+	case NVME_AER_NOTICE_NS_CHANGED:
+		dev_info(nvmeq->q_dmadev, "rescanning\n");
+		schedule_work(&nvmeq->dev->scan_work);
+	default:
+		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+	}
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1923,8 +1931,13 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	unsigned short bs;
 
 	if (nvme_identify_ns(dev, ns->ns_id, &id)) {
-		dev_warn(dev->dev, "%s: Identify failure\n", __func__);
-		return 0;
+		dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
+						dev->instance, ns->ns_id);
+		return -ENODEV;
+	}
+	if (id->ncap == 0) {
+		kfree(id);
+		return -ENODEV;
 	}
 
 	old_ms = ns->ms;
@@ -1958,7 +1971,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 								!ns->ext)
 		nvme_init_integrity(ns);
 
-	if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+	if (ns->ms && !blk_get_integrity(disk))
 		set_capacity(disk, 0);
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2073,11 +2086,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	 * requires it.
 	 */
 	set_capacity(disk, 0);
-	nvme_revalidate_disk(ns->disk);
+	if (nvme_revalidate_disk(ns->disk))
+		goto out_free_disk;
+
 	add_disk(ns->disk);
 	if (ns->ms)
 		revalidate_disk(ns->disk);
 	return;
+ out_free_disk:
+	kfree(disk);
+	list_del(&ns->list);
  out_free_queue:
 	blk_cleanup_queue(ns->queue);
  out_free_ns:
@@ -2194,6 +2212,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return result;
 }
 
+static void nvme_free_namespace(struct nvme_ns *ns)
+{
+	list_del(&ns->list);
+
+	spin_lock(&dev_list_lock);
+	ns->disk->private_data = NULL;
+	spin_unlock(&dev_list_lock);
+
+	put_disk(ns->disk);
+	kfree(ns);
+}
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+	struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+	return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry(ns, &dev->namespaces, list) {
+		if (ns->ns_id == nsid)
+			return ns;
+		if (ns->ns_id > nsid)
+			break;
+	}
+	return NULL;
+}
+
+static inline bool nvme_io_incapable(struct nvme_dev *dev)
+{
+	return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
+							dev->online_queues < 2);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+	bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+
+	if (kill)
+		blk_set_queue_dying(ns->queue);
+	if (ns->disk->flags & GENHD_FL_UP) {
+		if (blk_get_integrity(ns->disk))
+			blk_integrity_unregister(ns->disk);
+		del_gendisk(ns->disk);
+	}
+	if (kill || !blk_queue_dying(ns->queue)) {
+		blk_mq_abort_requeue_list(ns->queue);
+		blk_cleanup_queue(ns->queue);
+        }
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+	struct nvme_ns *ns, *next;
+	unsigned i;
+
+	for (i = 1; i <= nn; i++) {
+		ns = nvme_find_ns(dev, i);
+		if (ns) {
+			if (revalidate_disk(ns->disk)) {
+				nvme_ns_remove(ns);
+				nvme_free_namespace(ns);
+			}
+		} else
+			nvme_alloc_ns(dev, i);
+	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		if (ns->ns_id > nn) {
+			nvme_ns_remove(ns);
+			nvme_free_namespace(ns);
+		}
+	}
+	list_sort(NULL, &dev->namespaces, ns_cmp);
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+	struct nvme_id_ctrl *ctrl;
+
+	if (!dev->tagset.tags)
+		return;
+	if (nvme_identify_ctrl(dev, &ctrl))
+		return;
+	nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
+	kfree(ctrl);
+}
+
 /*
  * Return: error value if an error occurred setting up the queues or calling
  * Identify Device.  0 if these succeeded, even if adding some of the
@@ -2204,7 +2315,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int res;
-	unsigned nn, i;
+	unsigned nn;
 	struct nvme_id_ctrl *ctrl;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
@@ -2250,9 +2361,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	if (blk_mq_alloc_tag_set(&dev->tagset))
 		return 0;
 
-	for (i = 1; i <= nn; i++)
-		nvme_alloc_ns(dev, i);
-
+	schedule_work(&dev->scan_work);
 	return 0;
 }
 
@@ -2552,17 +2661,8 @@ static void nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns;
 
-	list_for_each_entry(ns, &dev->namespaces, list) {
-		if (ns->disk->flags & GENHD_FL_UP) {
-			if (blk_get_integrity(ns->disk))
-				blk_integrity_unregister(ns->disk);
-			del_gendisk(ns->disk);
-		}
-		if (!blk_queue_dying(ns->queue)) {
-			blk_mq_abort_requeue_list(ns->queue);
-			blk_cleanup_queue(ns->queue);
-		}
-	}
+	list_for_each_entry(ns, &dev->namespaces, list)
+		nvme_ns_remove(ns);
 }
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -2621,16 +2721,8 @@ static void nvme_free_namespaces(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns, *next;
 
-	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-		list_del(&ns->list);
-
-		spin_lock(&dev_list_lock);
-		ns->disk->private_data = NULL;
-		spin_unlock(&dev_list_lock);
-
-		put_disk(ns->disk);
-		kfree(ns);
-	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+		nvme_free_namespace(ns);
 }
 
 static void nvme_free_dev(struct kref *kref)
@@ -2814,6 +2906,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		spin_unlock(&dev_list_lock);
 	} else {
 		nvme_unfreeze_queues(dev);
+		schedule_work(&dev->scan_work);
 		nvme_set_irq_hints(dev);
 	}
 	return 0;
@@ -2935,6 +3028,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto put_dev;
 
 	INIT_LIST_HEAD(&dev->node);
+	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
@@ -3007,6 +3101,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->probe_work);
 	flush_work(&dev->reset_work);
+	flush_work(&dev->scan_work);
 	device_remove_file(dev->device, &dev_attr_reset_controller);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 986bf8ad8e93..c0d94ed8ce9a 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -92,6 +92,7 @@ struct nvme_dev {
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
 	struct work_struct probe_work;
+	struct work_struct scan_work;
 	char name[12];
 	char serial[20];
 	char model[40];
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index b660dc2fadfb..732b32e92b02 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -179,6 +179,10 @@ enum {
 	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
 };
 
+enum {
+	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
+};
+
 struct nvme_lba_range_type {
 	__u8			type;
 	__u8			attributes;
-- 
cgit v1.2.3


From 90c337da1524863838658078ec34241f45d8394d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 6 Jun 2015 21:17:57 -0700
Subject: inet: add IP_BIND_ADDRESS_NO_PORT to overcome bind(0) limitations

When an application needs to force a source IP on an active TCP socket
it has to use bind(IP, port=x).

As most applications do not want to deal with already used ports, x is
often set to 0, meaning the kernel is in charge to find an available
port.
But kernel does not know yet if this socket is going to be a listener or
be connected.
It has very limited choices (no full knowledge of final 4-tuple for a
connect())

With limited ephemeral port range (about 32K ports), it is very easy to
fill the space.

This patch adds a new SOL_IP socket option, asking kernel to ignore
the 0 port provided by application in bind(IP, port=0) and only
remember the given IP address.

The port will be automatically chosen at connect() time, in a way
that allows sharing a source port as long as the 4-tuples are unique.

This new feature is available for both IPv4 and IPv6 (Thanks Neal)

Tested:

Wrote a test program and checked its behavior on IPv4 and IPv6.

strace(1) shows sequences of bind(IP=127.0.0.2, port=0) followed by
connect().
Also getsockname() show that the port is still 0 right after bind()
but properly allocated after connect().

socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5
setsockopt(5, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0
bind(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, 16) = 0
getsockname(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0
connect(5, {sa_family=AF_INET, sin_port=htons(53174), sin_addr=inet_addr("127.0.0.3")}, 16) = 0
getsockname(5, {sa_family=AF_INET, sin_port=htons(38050), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0

IPv6 test :

socket(PF_INET6, SOCK_STREAM, IPPROTO_IP) = 7
setsockopt(7, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0
bind(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0
getsockname(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0
connect(7, {sa_family=AF_INET6, sin6_port=htons(57300), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0
getsockname(7, {sa_family=AF_INET6, sin6_port=htons(60964), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0

I was able to bind()/connect() a million concurrent IPv4 sockets,
instead of ~32000 before patch.

lpaa23:~# ulimit -n 1000010
lpaa23:~# ./bind --connect --num-flows=1000000 &
1000000 sockets

lpaa23:~# grep TCP /proc/net/sockstat
TCP: inuse 2000063 orphan 0 tw 47 alloc 2000157 mem 66

Check that a given source port is indeed used by many different
connections :

lpaa23:~# ss -t src :40000 | head -10
State      Recv-Q Send-Q   Local Address:Port          Peer Address:Port
ESTAB      0      0           127.0.0.2:40000         127.0.202.33:44983
ESTAB      0      0           127.0.0.2:40000         127.2.27.240:44983
ESTAB      0      0           127.0.0.2:40000           127.2.98.5:44983
ESTAB      0      0           127.0.0.2:40000        127.0.124.196:44983
ESTAB      0      0           127.0.0.2:40000         127.2.139.38:44983
ESTAB      0      0           127.0.0.2:40000          127.1.59.80:44983
ESTAB      0      0           127.0.0.2:40000          127.3.6.228:44983
ESTAB      0      0           127.0.0.2:40000          127.0.38.53:44983
ESTAB      0      0           127.0.0.2:40000         127.1.197.10:44983

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 1 +
 include/uapi/linux/in.h | 1 +
 net/ipv4/af_inet.c      | 3 ++-
 net/ipv4/ip_sockglue.c  | 7 +++++++
 net/ipv6/af_inet6.c     | 3 ++-
 5 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index b6c3737da4e9..47eb67b08abd 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -187,6 +187,7 @@ struct inet_sock {
 				transparent:1,
 				mc_all:1,
 				nodefrag:1;
+	__u8			bind_address_no_port:1;
 	__u8			rcv_tos;
 	__u8			convert_csum;
 	int			uc_index;
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 641338bef651..83d6236a2f08 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -112,6 +112,7 @@ struct in_addr {
 #define IP_MINTTL       21
 #define IP_NODEFRAG     22
 #define IP_CHECKSUM	23
+#define IP_BIND_ADDRESS_NO_PORT	24
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6ad0f7a711c9..cc858ef44451 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -488,7 +488,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		inet->inet_saddr = 0;  /* Use device */
 
 	/* Make sure we are allowed to bind here. */
-	if (sk->sk_prot->get_port(sk, snum)) {
+	if ((snum || !inet->bind_address_no_port) &&
+	    sk->sk_prot->get_port(sk, snum)) {
 		inet->inet_saddr = inet->inet_rcv_saddr = 0;
 		err = -EADDRINUSE;
 		goto out_release_sock;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 7cfb0893f263..04ae2992a5cd 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -582,6 +582,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	case IP_TRANSPARENT:
 	case IP_MINTTL:
 	case IP_NODEFRAG:
+	case IP_BIND_ADDRESS_NO_PORT:
 	case IP_UNICAST_IF:
 	case IP_MULTICAST_TTL:
 	case IP_MULTICAST_ALL:
@@ -732,6 +733,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 		inet->nodefrag = val ? 1 : 0;
 		break;
+	case IP_BIND_ADDRESS_NO_PORT:
+		inet->bind_address_no_port = val ? 1 : 0;
+		break;
 	case IP_MTU_DISCOVER:
 		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
 			goto e_inval;
@@ -1324,6 +1328,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_NODEFRAG:
 		val = inet->nodefrag;
 		break;
+	case IP_BIND_ADDRESS_NO_PORT:
+		val = inet->bind_address_no_port;
+		break;
 	case IP_MTU_DISCOVER:
 		val = inet->pmtudisc;
 		break;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index f3866c0b6cfe..7de52b65173f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -362,7 +362,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		np->saddr = addr->sin6_addr;
 
 	/* Make sure we are allowed to bind here. */
-	if (sk->sk_prot->get_port(sk, snum)) {
+	if ((snum || !inet->bind_address_no_port) &&
+	    sk->sk_prot->get_port(sk, snum)) {
 		inet_reset_saddr(sk);
 		err = -EADDRINUSE;
 		goto out;
-- 
cgit v1.2.3


From d691f9e8d4405c334aa10d556e73c8bf44cb0e01 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Jun 2015 10:11:54 -0700
Subject: bpf: allow programs to write to certain skb fields

allow programs read/write skb->mark, tc_index fields and
((struct qdisc_skb_cb *)cb)->data.

mark and tc_index are generically useful in TC.
cb[0]-cb[4] are primarily used to pass arguments from one
program to another called via bpf_tail_call() which can
be seen in sockex3_kern.c example.

All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs.
mark, tc_index are writeable from tc_cls_act only.
cb[0]-cb[4] are writeable by both sockets and tc_cls_act.

Add verifier tests and improve sample code.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h         |  3 +-
 include/uapi/linux/bpf.h    |  2 +
 kernel/bpf/verifier.c       | 37 +++++++++++++-----
 net/core/filter.c           | 94 +++++++++++++++++++++++++++++++++++++++------
 samples/bpf/sockex3_kern.c  | 35 ++++++-----------
 samples/bpf/test_verifier.c | 84 +++++++++++++++++++++++++++++++++++++++-
 6 files changed, 207 insertions(+), 48 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ca854e5bb2f7..2235aee8096a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -105,7 +105,8 @@ struct bpf_verifier_ops {
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
 
-	u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
+	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
+				  int src_reg, int ctx_off,
 				  struct bpf_insn *insn);
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 42aa19abab86..602f05b7a275 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -248,6 +248,8 @@ struct __sk_buff {
 	__u32 priority;
 	__u32 ingress_ifindex;
 	__u32 ifindex;
+	__u32 tc_index;
+	__u32 cb[5];
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cfd9a40b9a5a..039d866fd36a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1692,6 +1692,8 @@ static int do_check(struct verifier_env *env)
 			}
 
 		} else if (class == BPF_STX) {
+			enum bpf_reg_type dst_reg_type;
+
 			if (BPF_MODE(insn->code) == BPF_XADD) {
 				err = check_xadd(env, insn);
 				if (err)
@@ -1700,11 +1702,6 @@ static int do_check(struct verifier_env *env)
 				continue;
 			}
 
-			if (BPF_MODE(insn->code) != BPF_MEM ||
-			    insn->imm != 0) {
-				verbose("BPF_STX uses reserved fields\n");
-				return -EINVAL;
-			}
 			/* check src1 operand */
 			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
 			if (err)
@@ -1714,6 +1711,8 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			dst_reg_type = regs[insn->dst_reg].type;
+
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
@@ -1721,6 +1720,15 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			if (insn->imm == 0) {
+				insn->imm = dst_reg_type;
+			} else if (dst_reg_type != insn->imm &&
+				   (dst_reg_type == PTR_TO_CTX ||
+				    insn->imm == PTR_TO_CTX)) {
+				verbose("same insn cannot be used with different pointers\n");
+				return -EINVAL;
+			}
+
 		} else if (class == BPF_ST) {
 			if (BPF_MODE(insn->code) != BPF_MEM ||
 			    insn->src_reg != BPF_REG_0) {
@@ -1839,12 +1847,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		if (BPF_CLASS(insn->code) == BPF_LDX &&
-		    (BPF_MODE(insn->code) != BPF_MEM ||
-		     insn->imm != 0)) {
+		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
 			verbose("BPF_LDX uses reserved fields\n");
 			return -EINVAL;
 		}
 
+		if (BPF_CLASS(insn->code) == BPF_STX &&
+		    ((BPF_MODE(insn->code) != BPF_MEM &&
+		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
+			verbose("BPF_STX uses reserved fields\n");
+			return -EINVAL;
+		}
+
 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
 			struct bpf_map *map;
 			struct fd f;
@@ -1967,12 +1981,17 @@ static int convert_ctx_accesses(struct verifier_env *env)
 	struct bpf_prog *new_prog;
 	u32 cnt;
 	int i;
+	enum bpf_access_type type;
 
 	if (!env->prog->aux->ops->convert_ctx_access)
 		return 0;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+			type = BPF_READ;
+		else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+			type = BPF_WRITE;
+		else
 			continue;
 
 		if (insn->imm != PTR_TO_CTX) {
@@ -1982,7 +2001,7 @@ static int convert_ctx_accesses(struct verifier_env *env)
 		}
 
 		cnt = env->prog->aux->ops->
-			convert_ctx_access(insn->dst_reg, insn->src_reg,
+			convert_ctx_access(type, insn->dst_reg, insn->src_reg,
 					   insn->off, insn_buf);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose("bpf verifier is misconfigured\n");
diff --git a/net/core/filter.c b/net/core/filter.c
index 36a69e33d76b..d271c06bf01f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -46,6 +46,7 @@
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
+#include <net/sch_generic.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
@@ -1463,13 +1464,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static bool sk_filter_is_valid_access(int off, int size,
-				      enum bpf_access_type type)
+static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 {
-	/* only read is allowed */
-	if (type != BPF_READ)
-		return false;
-
 	/* check bounds */
 	if (off < 0 || off >= sizeof(struct __sk_buff))
 		return false;
@@ -1485,8 +1481,42 @@ static bool sk_filter_is_valid_access(int off, int size,
 	return true;
 }
 
-static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
-					struct bpf_insn *insn_buf)
+static bool sk_filter_is_valid_access(int off, int size,
+				      enum bpf_access_type type)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct __sk_buff, cb[0]) ...
+			offsetof(struct __sk_buff, cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	return __is_valid_access(off, size, type);
+}
+
+static bool tc_cls_act_is_valid_access(int off, int size,
+				       enum bpf_access_type type)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct __sk_buff, mark):
+		case offsetof(struct __sk_buff, tc_index):
+		case offsetof(struct __sk_buff, cb[0]) ...
+			offsetof(struct __sk_buff, cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+	return __is_valid_access(off, size, type);
+}
+
+static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+				      int src_reg, int ctx_off,
+				      struct bpf_insn *insn_buf)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -1538,7 +1568,15 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 		break;
 
 	case offsetof(struct __sk_buff, mark):
-		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+					      offsetof(struct sk_buff, mark));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+					      offsetof(struct sk_buff, mark));
+		break;
 
 	case offsetof(struct __sk_buff, pkt_type):
 		return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
@@ -1553,6 +1591,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 	case offsetof(struct __sk_buff, vlan_tci):
 		return convert_skb_access(SKF_AD_VLAN_TAG,
 					  dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, cb[0]) ...
+		offsetof(struct __sk_buff, cb[4]):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+
+		ctx_off -= offsetof(struct __sk_buff, cb[0]);
+		ctx_off += offsetof(struct sk_buff, cb);
+		ctx_off += offsetof(struct qdisc_skb_cb, data);
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+		break;
+
+	case offsetof(struct __sk_buff, tc_index):
+#ifdef CONFIG_NET_SCHED
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
+					      offsetof(struct sk_buff, tc_index));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+					      offsetof(struct sk_buff, tc_index));
+		break;
+#else
+		if (type == BPF_WRITE)
+			*insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+		else
+			*insn++ = BPF_MOV64_IMM(dst_reg, 0);
+		break;
+#endif
 	}
 
 	return insn - insn_buf;
@@ -1561,13 +1631,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 static const struct bpf_verifier_ops sk_filter_ops = {
 	.get_func_proto = sk_filter_func_proto,
 	.is_valid_access = sk_filter_is_valid_access,
-	.convert_ctx_access = sk_filter_convert_ctx_access,
+	.convert_ctx_access = bpf_net_convert_ctx_access,
 };
 
 static const struct bpf_verifier_ops tc_cls_act_ops = {
 	.get_func_proto = tc_cls_act_func_proto,
-	.is_valid_access = sk_filter_is_valid_access,
-	.convert_ctx_access = sk_filter_convert_ctx_access,
+	.is_valid_access = tc_cls_act_is_valid_access,
+	.convert_ctx_access = bpf_net_convert_ctx_access,
 };
 
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 2625b987944f..41ae2fd21b13 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -89,7 +89,6 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
 
 struct globals {
 	struct flow_keys flow;
-	__u32 nhoff;
 };
 
 struct bpf_map_def SEC("maps") percpu_map = {
@@ -139,7 +138,7 @@ static void update_stats(struct __sk_buff *skb, struct globals *g)
 static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 					   struct globals *g, __u32 ip_proto)
 {
-	__u32 nhoff = g->nhoff;
+	__u32 nhoff = skb->cb[0];
 	int poff;
 
 	switch (ip_proto) {
@@ -165,7 +164,7 @@ static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 		if (gre_flags & GRE_SEQ)
 			nhoff += 4;
 
-		g->nhoff = nhoff;
+		skb->cb[0] = nhoff;
 		parse_eth_proto(skb, gre_proto);
 		break;
 	}
@@ -195,7 +194,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
 	if (!g)
 		return 0;
 
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	if (unlikely(ip_is_fragment(skb, nhoff)))
 		return 0;
@@ -210,7 +209,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
 	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
 	nhoff += (verlen & 0xF) << 2;
 
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_ip_proto(skb, g, ip_proto);
 	return 0;
 }
@@ -223,7 +222,7 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
 	if (!g)
 		return 0;
 
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	ip_proto = load_byte(skb,
 			     nhoff + offsetof(struct ipv6hdr, nexthdr));
@@ -233,25 +232,21 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
 				     nhoff + offsetof(struct ipv6hdr, daddr));
 	nhoff += sizeof(struct ipv6hdr);
 
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_ip_proto(skb, g, ip_proto);
 	return 0;
 }
 
 PROG(PARSE_VLAN)(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff, proto;
 
-	if (!g)
-		return 0;
-
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
 						h_vlan_encapsulated_proto));
 	nhoff += sizeof(struct vlan_hdr);
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 
 	parse_eth_proto(skb, proto);
 
@@ -260,17 +255,13 @@ PROG(PARSE_VLAN)(struct __sk_buff *skb)
 
 PROG(PARSE_MPLS)(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff, label;
 
-	if (!g)
-		return 0;
-
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	label = load_word(skb, nhoff);
 	nhoff += sizeof(struct mpls_label);
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 
 	if (label & MPLS_LS_S_MASK) {
 		__u8 verlen = load_byte(skb, nhoff);
@@ -288,14 +279,10 @@ PROG(PARSE_MPLS)(struct __sk_buff *skb)
 SEC("socket/0")
 int main_prog(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff = ETH_HLEN;
 	__u32 proto = load_half(skb, 12);
 
-	if (!g)
-		return 0;
-
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_eth_proto(skb, proto);
 	return 0;
 }
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index 12f3780af73f..693605997abc 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -29,6 +29,7 @@ struct bpf_test {
 		ACCEPT,
 		REJECT
 	} result;
+	enum bpf_prog_type prog_type;
 };
 
 static struct bpf_test tests[] = {
@@ -743,6 +744,84 @@ static struct bpf_test tests[] = {
 		.errstr = "different pointers",
 		.result = REJECT,
 	},
+	{
+		"check skb->mark is not writeable by sockets",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check skb->tc_index is not writeable by sockets",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check non-u32 access to cb",
+		.insns = {
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check out of range skb->cb access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[60])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
+	},
+	{
+		"write skb fields from socket prog",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"write skb fields from tc_cls_act prog",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 };
 
 static int probe_filter_length(struct bpf_insn *fp)
@@ -775,6 +854,7 @@ static int test(void)
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_insn *prog = tests[i].insns;
+		int prog_type = tests[i].prog_type;
 		int prog_len = probe_filter_length(prog);
 		int *fixup = tests[i].fixup;
 		int map_fd = -1;
@@ -789,8 +869,8 @@ static int test(void)
 		}
 		printf("#%d %s ", i, tests[i].descr);
 
-		prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
-					prog_len * sizeof(struct bpf_insn),
+		prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
+					prog, prog_len * sizeof(struct bpf_insn),
 					"GPL", 0);
 
 		if (tests[i].result == ACCEPT) {
-- 
cgit v1.2.3


From c9fdfa14c3792c0160849c484e83aa57afd80ccc Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 14 May 2015 23:09:58 +0200
Subject: perf: add new PERF_SAMPLE_BRANCH_IND_JUMP branch sample type

This patch adds a new branch_sample_type flag to enable
filtering branch sampling to indirect jumps. The support
is subject to hardware or kernel software support on each
architecture.

Filtering on indirect jump is useful to study the targets
of the jump.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: dsahern@gmail.com
Cc: jolsa@redhat.com
Cc: kan.liang@intel.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/1431637800-31061-2-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 309211b3eb67..c4622f1ce046 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -167,6 +167,7 @@ enum perf_branch_sample_type_shift {
 	PERF_SAMPLE_BRANCH_COND_SHIFT		= 10, /* conditional branches */
 
 	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT	= 11, /* call/ret stack */
+	PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT	= 12, /* indirect jumps */
 
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
@@ -186,6 +187,7 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_COND		= 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
 
 	PERF_SAMPLE_BRANCH_CALL_STACK	= 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+	PERF_SAMPLE_BRANCH_IND_JUMP	= 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
 
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
-- 
cgit v1.2.3


From f38b0dbb491a6987e198aa6b428db8692a6480f8 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Sun, 10 May 2015 15:13:14 -0400
Subject: perf/x86/intel: Introduce PERF_RECORD_LOST_SAMPLES

After enlarging the PEBS interrupt threshold, there may be some mixed up
PEBS samples which are discarded by the kernel.

This patch makes the kernel emit a PERF_RECORD_LOST_SAMPLES record with
the number of possible discarded records when it is impossible to demux
the samples.

It makes sure the user is not left in the dark about such discards.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1431285195-14269-8-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 20 ++++++++++++++++---
 include/linux/perf_event.h                |  3 +++
 include/uapi/linux/perf_event.h           | 12 +++++++++++
 kernel/events/core.c                      | 33 +++++++++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 266079a3a646..34d0c4816141 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1126,6 +1126,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	void *base, *at, *top;
 	int bit;
 	short counts[MAX_PEBS_EVENTS] = {};
+	short error[MAX_PEBS_EVENTS] = {};
 
 	if (!x86_pmu.pebs_active)
 		return;
@@ -1169,20 +1170,33 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			/* slow path */
 			pebs_status = p->status & cpuc->pebs_enabled;
 			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
-			if (pebs_status != (1 << bit))
+			if (pebs_status != (1 << bit)) {
+				u8 i;
+
+				for_each_set_bit(i, (unsigned long *)&pebs_status,
+						 MAX_PEBS_EVENTS)
+					error[i]++;
 				continue;
+			}
 		}
 		counts[bit]++;
 	}
 
 	for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
-		if (counts[bit] == 0)
+		if ((counts[bit] == 0) && (error[bit] == 0))
 			continue;
 		event = cpuc->events[bit];
 		WARN_ON_ONCE(!event);
 		WARN_ON_ONCE(!event->attr.precise_ip);
 
-		__intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]);
+		/* log dropped samples number */
+		if (error[bit])
+			perf_log_lost_samples(event, error[bit]);
+
+		if (counts[bit]) {
+			__intel_pmu_pebs_event(event, iregs, base,
+					       top, bit, counts[bit]);
+		}
 	}
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5f192e1bc98e..a204d5266f5f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -743,6 +743,9 @@ perf_event__output_id_sample(struct perf_event *event,
 			     struct perf_output_handle *handle,
 			     struct perf_sample_data *sample);
 
+extern void
+perf_log_lost_samples(struct perf_event *event, u64 lost);
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
 	return event->attr.sample_period != 0;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c4622f1ce046..613ed9ad588f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -802,6 +802,18 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_ITRACE_START		= 12,
 
+	/*
+	 * Records the dropped/lost sample number.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u64				lost;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_LOST_SAMPLES		= 13,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e499b4e43aff..9e0773d5d110 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5974,6 +5974,39 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
 	perf_output_end(&handle);
 }
 
+/*
+ * Lost/dropped samples logging
+ */
+void perf_log_lost_samples(struct perf_event *event, u64 lost)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				lost;
+	} lost_samples_event = {
+		.header = {
+			.type = PERF_RECORD_LOST_SAMPLES,
+			.misc = 0,
+			.size = sizeof(lost_samples_event),
+		},
+		.lost		= lost,
+	};
+
+	perf_event_header__init_id(&lost_samples_event.header, &sample, event);
+
+	ret = perf_output_begin(&handle, event,
+				lost_samples_event.header.size);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, lost_samples_event);
+	perf_event__output_id_sample(event, &handle, &sample);
+	perf_output_end(&handle);
+}
+
 /*
  * IRQ throttle logging
  */
-- 
cgit v1.2.3


From 9e58095f9660f88d6a2febe87d5073a6b2e9c399 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Tue, 14 Oct 2014 02:19:46 +0200
Subject: NFC: netlink: Implement vendor command support

Vendor commands are passed from userspace through the
NFC_CMD_VENDOR netlink command, allowing driver and hardware
specific operations implementations like for example RF tuning
or production line calibration.

Drivers will associate a set of vendor commands to a vendor
id, which could typically be an OUI. The netlink kernel
implementation will try to match the received vendor id
and sub command attributes with the registered ones. When
such match is found, the driver defined sub command routine
is called.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/uapi/linux/nfc.h | 10 +++++++++
 net/nfc/netlink.c        | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index c1e2e63cf9b5..dd3f75389076 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -86,6 +86,8 @@
  *	for this event is the application ID (AID).
  * @NFC_CMD_GET_SE: Dump all discovered secure elements from an NFC controller.
  * @NFC_CMD_SE_IO: Send/Receive APDUs to/from the selected secure element.
+ * @NFC_CMD_VENDOR: Vendor specific command, to be implemented directly
+ *	from the driver in order to support hardware specific operations.
  */
 enum nfc_commands {
 	NFC_CMD_UNSPEC,
@@ -117,6 +119,7 @@ enum nfc_commands {
 	NFC_CMD_GET_SE,
 	NFC_CMD_SE_IO,
 	NFC_CMD_ACTIVATE_TARGET,
+	NFC_CMD_VENDOR,
 /* private: internal use only */
 	__NFC_CMD_AFTER_LAST
 };
@@ -153,6 +156,10 @@ enum nfc_commands {
  * @NFC_ATTR_APDU: Secure element APDU
  * @NFC_ATTR_TARGET_ISO15693_DSFID: ISO 15693 Data Storage Format Identifier
  * @NFC_ATTR_TARGET_ISO15693_UID: ISO 15693 Unique Identifier
+ * @NFC_ATTR_VENDOR_ID: NFC manufacturer unique ID, typically an OUI
+ * @NFC_ATTR_VENDOR_SUBCMD: Vendor specific sub command
+ * @NFC_ATTR_VENDOR_DATA: Vendor specific data, to be optionally passed
+ *	to a vendor specific command implementation
  */
 enum nfc_attrs {
 	NFC_ATTR_UNSPEC,
@@ -184,6 +191,9 @@ enum nfc_attrs {
 	NFC_ATTR_TARGET_ISO15693_DSFID,
 	NFC_ATTR_TARGET_ISO15693_UID,
 	NFC_ATTR_SE_PARAMS,
+	NFC_ATTR_VENDOR_ID,
+	NFC_ATTR_VENDOR_SUBCMD,
+	NFC_ATTR_VENDOR_DATA,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 3763036710ae..f85f37ed19b2 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -5,6 +5,12 @@
  *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
  *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
  *
+ * Vendor commands implementation based on net/wireless/nl80211.c
+ * which is:
+ *
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -1489,6 +1495,50 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
 	return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
 }
 
+static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+			       struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	struct nfc_vendor_cmd *cmd;
+	u32 dev_idx, vid, subcmd;
+	u8 *data;
+	size_t data_len;
+	int i;
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+	    !info->attrs[NFC_ATTR_VENDOR_ID] ||
+	    !info->attrs[NFC_ATTR_VENDOR_SUBCMD])
+		return -EINVAL;
+
+	dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+	vid = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_ID]);
+	subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
+
+	dev = nfc_get_device(dev_idx);
+	if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
+		return -ENODEV;
+
+	data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
+	if (data) {
+		data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
+		if (data_len == 0)
+			return -EINVAL;
+	} else {
+		data_len = 0;
+	}
+
+	for (i = 0; i < dev->n_vendor_cmds; i++) {
+		cmd = &dev->vendor_cmds[i];
+
+		if (cmd->vendor_id != vid || cmd->subcmd != subcmd)
+			continue;
+
+		return cmd->doit(dev, data, data_len);
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static const struct genl_ops nfc_genl_ops[] = {
 	{
 		.cmd = NFC_CMD_GET_DEVICE,
@@ -1579,6 +1629,11 @@ static const struct genl_ops nfc_genl_ops[] = {
 		.doit = nfc_genl_activate_target,
 		.policy = nfc_genl_policy,
 	},
+	{
+		.cmd = NFC_CMD_VENDOR,
+		.doit = nfc_genl_vendor_cmd,
+		.policy = nfc_genl_policy,
+	},
 };
 
 
-- 
cgit v1.2.3


From dd895d7f21b244e7fd4c7477697e274de7e44ecb Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 9 Jun 2015 08:05:10 +0200
Subject: can: cangw: introduce optional uid to reference created routing jobs

Similar to referencing iptables rules by their line number this UID allows to
reference created routing jobs, e.g. to alter configured data modifications.

The UID is an optional non-zero value which can be provided at routing job
creation time. When the UID is set the UID replaces the data modification
configuration as job identification attribute e.g. at job removal time.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/gw.h |  5 ++++
 net/can/gw.c                | 68 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 61 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index 3e6184cf2f6d..5079b9d57e31 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h
@@ -78,6 +78,7 @@ enum {
 	CGW_FILTER,	/* specify struct can_filter on source CAN device */
 	CGW_DELETED,	/* number of deleted CAN frames (see max_hops param) */
 	CGW_LIM_HOPS,	/* limit the number of hops of this specific rule */
+	CGW_MOD_UID,	/* user defined identifier for modification updates */
 	__CGW_MAX
 };
 
@@ -162,6 +163,10 @@ enum {
  * load time of the can-gw module). This value is used to reduce the number of
  * possible hops for this gateway rule to a value smaller then max_hops.
  *
+ * CGW_MOD_UID (length 4 bytes):
+ * Optional non-zero user defined routing job identifier to alter existing
+ * modification settings at runtime.
+ *
  * CGW_CS_XOR (length 4 bytes):
  * Set a simple XOR checksum starting with an initial value into
  * data[result-idx] using data[start-idx] .. data[end-idx]
diff --git a/net/can/gw.c b/net/can/gw.c
index a6f448e18ea8..455168718c2e 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -110,6 +110,7 @@ struct cf_mod {
 		void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
 		void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
 	} csumfunc;
+	u32 uid;
 };
 
 
@@ -548,6 +549,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 			goto cancel;
 	}
 
+	if (gwj->mod.uid) {
+		if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0)
+			goto cancel;
+	}
+
 	if (gwj->mod.csumfunc.crc8) {
 		if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
 			    &gwj->mod.csum.crc8) < 0)
@@ -619,6 +625,7 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = {
 	[CGW_DST_IF]	= { .type = NLA_U32 },
 	[CGW_FILTER]	= { .len = sizeof(struct can_filter) },
 	[CGW_LIM_HOPS]	= { .type = NLA_U8 },
+	[CGW_MOD_UID]	= { .type = NLA_U32 },
 };
 
 /* check for common and gwtype specific attributes */
@@ -761,6 +768,10 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			else
 				mod->csumfunc.xor = cgw_csum_xor_neg;
 		}
+
+		if (tb[CGW_MOD_UID]) {
+			nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32));
+		}
 	}
 
 	if (gwtype == CGW_TYPE_CAN_CAN) {
@@ -802,6 +813,8 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 {
 	struct rtcanmsg *r;
 	struct cgw_job *gwj;
+	struct cf_mod mod;
+	struct can_can_gw ccgw;
 	u8 limhops = 0;
 	int err = 0;
 
@@ -819,6 +832,36 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 	if (r->gwtype != CGW_TYPE_CAN_CAN)
 		return -EINVAL;
 
+	err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
+	if (err < 0)
+		return err;
+
+	if (mod.uid) {
+
+		ASSERT_RTNL();
+
+		/* check for updating an existing job with identical uid */
+		hlist_for_each_entry(gwj, &cgw_list, list) {
+
+			if (gwj->mod.uid != mod.uid)
+				continue;
+
+			/* interfaces & filters must be identical */
+			if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
+				return -EINVAL;
+
+			/* update modifications with disabled softirq & quit */
+			local_bh_disable();
+			memcpy(&gwj->mod, &mod, sizeof(mod));
+			local_bh_enable();
+			return 0;
+		}
+	}
+
+	/* ifindex == 0 is not allowed for job creation */
+	if (!ccgw.src_idx || !ccgw.dst_idx)
+		return -ENODEV;
+
 	gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL);
 	if (!gwj)
 		return -ENOMEM;
@@ -828,18 +871,14 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 	gwj->deleted_frames = 0;
 	gwj->flags = r->flags;
 	gwj->gwtype = r->gwtype;
+	gwj->limit_hops = limhops;
 
-	err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw,
-			     &limhops);
-	if (err < 0)
-		goto out;
+	/* insert already parsed information */
+	memcpy(&gwj->mod, &mod, sizeof(mod));
+	memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw));
 
 	err = -ENODEV;
 
-	/* ifindex == 0 is not allowed for job creation */
-	if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx)
-		goto out;
-
 	gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx);
 
 	if (!gwj->src.dev)
@@ -856,8 +895,6 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 	if (gwj->dst.dev->type != ARPHRD_CAN)
 		goto out;
 
-	gwj->limit_hops = limhops;
-
 	ASSERT_RTNL();
 
 	err = cgw_register_filter(gwj);
@@ -931,8 +968,15 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (gwj->limit_hops != limhops)
 			continue;
 
-		if (memcmp(&gwj->mod, &mod, sizeof(mod)))
-			continue;
+		/* we have a match when uid is enabled and identical */
+		if (gwj->mod.uid || mod.uid) {
+			if (gwj->mod.uid != mod.uid)
+				continue;
+		} else {
+			/* no uid => check for identical modifications */
+			if (memcmp(&gwj->mod, &mod, sizeof(mod)))
+				continue;
+		}
 
 		/* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */
 		if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
-- 
cgit v1.2.3


From 448bac10318977e2c41548d7e6a1d87f9d48784d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 11:18:16 -0300
Subject: [media] DocBook: document DVB-S2 pilot in a table

Putting it into a table allows to comment each possible
values, with makes more clear what field means.

Also, it allows to do cross-references with the frontend.h.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 31 +++++++++++++++++++------
 include/uapi/linux/dvb/frontend.h               |  6 +++--
 2 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 8d57f0c9b6aa..e31d9457671f 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -417,13 +417,30 @@ get/set up to 64 properties. The actual meaning of each property is described on
 	<para>Sets DVB-S2 pilot</para>
 	<section id="fe-pilot-t">
 		<title>fe_pilot type</title>
-		<programlisting>
-typedef enum fe_pilot {
-	PILOT_ON,
-	PILOT_OFF,
-	PILOT_AUTO,
-} fe_pilot_t;
-		</programlisting>
+<table pgwide="1" frame="none" id="fe-pilot">
+    <title>enum fe_pilot</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char" id="PILOT-ON"><constant>PILOT_ON</constant></entry>
+	    <entry align="char">Pilot tones enabled</entry>
+	</row><row>
+	    <entry align="char" id="PILOT-OFF"><constant>PILOT_OFF</constant></entry>
+	    <entry align="char">Pilot tones disabled</entry>
+	</row><row>
+	    <entry align="char" id="PILOT-AUTO"><constant>PILOT_AUTO</constant></entry>
+	    <entry align="char">Autodetect pilot tones</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
 		</section>
 	</section>
 	<section id="DTV-ROLLOFF">
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 3a7ff9002654..bb222eb04627 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -399,11 +399,13 @@ struct dvb_frontend_event {
 
 #define DTV_MAX_COMMAND		DTV_STAT_TOTAL_BLOCK_COUNT
 
-typedef enum fe_pilot {
+enum fe_pilot {
 	PILOT_ON,
 	PILOT_OFF,
 	PILOT_AUTO,
-} fe_pilot_t;
+};
+
+typedef enum fe_pilot fe_pilot_t;
 
 typedef enum fe_rolloff {
 	ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
-- 
cgit v1.2.3


From b35f6ba97882ef4e00c1faae1d66232f7314fe91 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 11:59:27 -0300
Subject: [media] DocBook: better document the DVB-S2 rolloff factor

Instead of using a program listing, use a table and make clearer
what each define means.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 35 +++++++++++++++++++------
 include/uapi/linux/dvb/frontend.h               |  6 +++--
 2 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index e1d1e2469029..c0aa1ad9eccf 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -468,14 +468,33 @@ get/set up to 64 properties. The actual meaning of each property is described on
 
 	<section id="fe-rolloff-t">
 		<title>fe_rolloff type</title>
-		<programlisting>
-typedef enum fe_rolloff {
-	ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
-	ROLLOFF_20,
-	ROLLOFF_25,
-	ROLLOFF_AUTO,
-} fe_rolloff_t;
-		</programlisting>
+<table pgwide="1" frame="none" id="fe-rolloff">
+    <title>enum fe_rolloff</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char" id="ROLLOFF-35"><constant>ROLLOFF_35</constant></entry>
+	    <entry align="char">Roloff factor: &alpha;=35%</entry>
+	</row><row>
+	    <entry align="char" id="ROLLOFF-20"><constant>ROLLOFF_20</constant></entry>
+	    <entry align="char">Roloff factor: &alpha;=20%</entry>
+	</row><row>
+	    <entry align="char" id="ROLLOFF-25"><constant>ROLLOFF_25</constant></entry>
+	    <entry align="char">Roloff factor: &alpha;=25%</entry>
+	</row><row>
+	    <entry align="char" id="ROLLOFF-AUTO"><constant>ROLLOFF_AUTO</constant></entry>
+	    <entry align="char">Auto-detect the roloff factor.</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
 		</section>
 	</section>
 	<section id="DTV-DISEQC-SLAVE-REPLY">
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index bb222eb04627..cdd9e2fc030d 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -407,12 +407,14 @@ enum fe_pilot {
 
 typedef enum fe_pilot fe_pilot_t;
 
-typedef enum fe_rolloff {
+enum fe_rolloff {
 	ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
 	ROLLOFF_20,
 	ROLLOFF_25,
 	ROLLOFF_AUTO,
-} fe_rolloff_t;
+};
+
+typedef enum fe_rolloff fe_rolloff_t;
 
 typedef enum fe_delivery_system {
 	SYS_UNDEFINED,
-- 
cgit v1.2.3


From d21ddba826d8704525fa637e69cf90b8e034d94a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 14:21:09 -0300
Subject: [media] DocBook: properly document the delivery systems

Use a table for the delivery systems. The table is organized
by the type (cable, satellite, terrestrial) and shows what
standards are not fully implemented.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 99 +++++++++++++++++++------
 include/uapi/linux/dvb/frontend.h               |  6 +-
 2 files changed, 79 insertions(+), 26 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index c0aa1ad9eccf..08227d4e9150 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -515,31 +515,82 @@ get/set up to 64 properties. The actual meaning of each property is described on
 		<section id="fe-delivery-system-t">
 		<title>fe_delivery_system type</title>
 		<para>Possible values: </para>
-<programlisting>
 
-typedef enum fe_delivery_system {
-	SYS_UNDEFINED,
-	SYS_DVBC_ANNEX_A,
-	SYS_DVBC_ANNEX_B,
-	SYS_DVBT,
-	SYS_DSS,
-	SYS_DVBS,
-	SYS_DVBS2,
-	SYS_DVBH,
-	SYS_ISDBT,
-	SYS_ISDBS,
-	SYS_ISDBC,
-	SYS_ATSC,
-	SYS_ATSCMH,
-	SYS_DTMB,
-	SYS_CMMB,
-	SYS_DAB,
-	SYS_DVBT2,
-	SYS_TURBO,
-	SYS_DVBC_ANNEX_C,
-} fe_delivery_system_t;
-</programlisting>
-		</section>
+<table pgwide="1" frame="none" id="fe-delivery-system">
+    <title>enum fe_delivery_system</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+		<entry id="SYS-UNDEFINED"><constant>SYS_UNDEFINED</constant></entry>
+		<entry>Undefined standard. Generally, indicates an error</entry>
+	</row><row>
+		<entry id="SYS-DVBC-ANNEX-A"><constant>SYS_DVBC_ANNEX_A</constant></entry>
+		<entry>Cable TV: DVB-C following ITU-T J.83 Annex A spec</entry>
+	</row><row>
+		<entry id="SYS-DVBC-ANNEX-B"><constant>SYS_DVBC_ANNEX_B</constant></entry>
+		<entry>Cable TV: DVB-C following ITU-T J.83 Annex B spec (ClearQAM)</entry>
+	</row><row>
+		<entry id="SYS-DVBC-ANNEX-C"><constant>SYS_DVBC_ANNEX_C</constant></entry>
+		<entry>Cable TV: DVB-C following ITU-T J.83 Annex C spec</entry>
+	</row><row>
+		<entry id="SYS-ISDBC"><constant>SYS_ISDBC</constant></entry>
+		<entry>Cable TV: ISDB-C (no drivers yet)</entry>
+	</row><row>
+		<entry id="SYS-DVBT"><constant>SYS_DVBT</constant></entry>
+		<entry>Terrestral TV: DVB-T</entry>
+	</row><row>
+		<entry id="SYS-DVBT2"><constant>SYS_DVBT2</constant></entry>
+		<entry>Terrestral TV: DVB-T2</entry>
+	</row><row>
+		<entry id="SYS-ISDBT"><constant>SYS_ISDBT</constant></entry>
+		<entry>Terrestral TV: ISDB-T</entry>
+	</row><row>
+		<entry id="SYS-ATSC"><constant>SYS_ATSC</constant></entry>
+		<entry>Terrestral TV: ATSC</entry>
+	</row><row>
+		<entry id="SYS-ATSCMH"><constant>SYS_ATSCMH</constant></entry>
+		<entry>Terrestral TV (mobile): ATSC-M/H</entry>
+	</row><row>
+		<entry id="SYS-DTMB"><constant>SYS_DTMB</constant></entry>
+		<entry>Terrestrial TV: DTMB</entry>
+	</row><row>
+		<entry id="SYS-DVBS"><constant>SYS_DVBS</constant></entry>
+		<entry>Satellite TV: DVB-S</entry>
+	</row><row>
+		<entry id="SYS-DVBS2"><constant>SYS_DVBS2</constant></entry>
+		<entry>Satellite TV: DVB-S2</entry>
+	</row><row>
+		<entry id="SYS-TURBO"><constant>SYS_TURBO</constant></entry>
+		<entry>Satellite TV: DVB-S Turbo</entry>
+	</row><row>
+		<entry id="SYS-ISDBS"><constant>SYS_ISDBS</constant></entry>
+		<entry>Satellite TV: ISDB-S</entry>
+	</row><row>
+		<entry id="SYS-DAB"><constant>SYS_DAB</constant></entry>
+		<entry>Digital audio: DAB (not fully supported)</entry>
+	</row><row>
+		<entry id="SYS-DSS"><constant>SYS_DSS</constant></entry>
+		<entry>Satellite TV:"DSS (not fully supported)</entry>
+	</row><row>
+		<entry id="SYS-CMMB"><constant>SYS_CMMB</constant></entry>
+		<entry>Terrestral TV (mobile):CMMB (not fully supported)</entry>
+	</row><row>
+		<entry id="SYS-DVBH"><constant>SYS_DVBH</constant></entry>
+		<entry>Terrestral TV (mobile): DVB-H (standard deprecated)</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+
+
+</section>
 	</section>
 	<section id="DTV-ISDBT-PARTIAL-RECEPTION">
 		<title><constant>DTV_ISDBT_PARTIAL_RECEPTION</constant></title>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index cdd9e2fc030d..66499f238204 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -416,7 +416,7 @@ enum fe_rolloff {
 
 typedef enum fe_rolloff fe_rolloff_t;
 
-typedef enum fe_delivery_system {
+enum fe_delivery_system {
 	SYS_UNDEFINED,
 	SYS_DVBC_ANNEX_A,
 	SYS_DVBC_ANNEX_B,
@@ -436,7 +436,9 @@ typedef enum fe_delivery_system {
 	SYS_DVBT2,
 	SYS_TURBO,
 	SYS_DVBC_ANNEX_C,
-} fe_delivery_system_t;
+};
+
+typedef enum fe_delivery_system fe_delivery_system_t;
 
 /* backward compatibility */
 #define SYS_DVBC_ANNEX_AC	SYS_DVBC_ANNEX_A
-- 
cgit v1.2.3


From fe557e40f576741308d3546906eba7094e940de4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 14:31:13 -0300
Subject: [media] DocBook: add xrefs for enum fe_type

The only enum that was missing xrefs at frontend.h is fe_type.
Add xrefs for them.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/frontend_legacy_api.xml | 8 ++++----
 include/uapi/linux/dvb/frontend.h                       | 6 ++++--
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index 8523caf91a2c..8fadf3a4ba44 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -20,22 +20,22 @@
   </thead>
   <tbody valign="top">
   <row>
-     <entry id="FE_QPSK"><constant>FE_QPSK</constant></entry>
+     <entry id="FE-QPSK"><constant>FE_QPSK</constant></entry>
      <entry>For DVB-S standard</entry>
      <entry><constant>SYS_DVBS</constant></entry>
   </row>
   <row>
-     <entry id="FE_QAM"><constant>FE_QAM</constant></entry>
+     <entry id="FE-QAM"><constant>FE_QAM</constant></entry>
      <entry>For DVB-C annex A standard</entry>
      <entry><constant>SYS_DVBC_ANNEX_A</constant></entry>
   </row>
   <row>
-     <entry id="FE_OFDM"><constant>FE_OFDM</constant></entry>
+     <entry id="FE-OFDM"><constant>FE_OFDM</constant></entry>
      <entry>For DVB-T standard</entry>
      <entry><constant>SYS_DVBT</constant></entry>
   </row>
   <row>
-     <entry id="FE_ATSC"><constant>FE_ATSC</constant></entry>
+     <entry id="FE-ATSC"><constant>FE_ATSC</constant></entry>
      <entry>For ATSC standard (terrestrial) or for DVB-C Annex B (cable) used in US.</entry>
      <entry><constant>SYS_ATSC</constant> (terrestrial) or <constant>SYS_DVBC_ANNEX_B</constant> (cable)</entry>
   </row>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 66499f238204..a36d802fae0c 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -28,12 +28,14 @@
 
 #include <linux/types.h>
 
-typedef enum fe_type {
+enum fe_type {
 	FE_QPSK,
 	FE_QAM,
 	FE_OFDM,
 	FE_ATSC
-} fe_type_t;
+};
+
+typedef enum fe_type fe_type_t;
 
 
 enum fe_caps {
-- 
cgit v1.2.3


From 0df289a209e02f0926042ab07d7d2595ea2d2e9b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 14:53:52 -0300
Subject: [media] dvb: Get rid of typedev usage for enums

The DVB API was originally defined using typedefs. This is against
Kernel CodingStyle, and there's no good usage here. While we can't
remove its usage on userspace, we can avoid its usage in Kernelspace.

So, let's do it.

This patch was generated by this shell script:

	for j in $(grep typedef include/uapi/linux/dvb/frontend.h |cut -d' ' -f 3); do for i in $(find drivers/media -name '*.[ch]' -type f) $(find drivers/staging/media -name '*.[ch]' -type f); do sed "s,${j}_t,enum $j," <$i >a && mv a $i; done; done

While here, make CodingStyle fixes on the affected lines.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Acked-by: Stefan Richter <stefanr@s5r6.in-berlin.de> # for drivers/media/firewire/*
---
 drivers/media/common/b2c2/flexcop-fe-tuner.c      |  7 ++--
 drivers/media/common/siano/smsdvb-main.c          |  6 +--
 drivers/media/common/siano/smsdvb.h               |  2 +-
 drivers/media/dvb-core/dvb_frontend.c             | 27 +++++++-----
 drivers/media/dvb-core/dvb_frontend.h             | 42 ++++++++++---------
 drivers/media/dvb-frontends/a8293.c               |  2 +-
 drivers/media/dvb-frontends/af9013.c              |  4 +-
 drivers/media/dvb-frontends/af9033.c              |  4 +-
 drivers/media/dvb-frontends/as102_fe.c            |  4 +-
 drivers/media/dvb-frontends/atbm8830.c            |  3 +-
 drivers/media/dvb-frontends/au8522_dig.c          |  4 +-
 drivers/media/dvb-frontends/au8522_priv.h         |  2 +-
 drivers/media/dvb-frontends/bcm3510.c             |  2 +-
 drivers/media/dvb-frontends/cx22700.c             |  9 ++--
 drivers/media/dvb-frontends/cx22702.c             |  2 +-
 drivers/media/dvb-frontends/cx24110.c             | 19 +++++----
 drivers/media/dvb-frontends/cx24116.c             | 38 +++++++++--------
 drivers/media/dvb-frontends/cx24117.c             | 40 +++++++++---------
 drivers/media/dvb-frontends/cx24120.c             | 50 +++++++++++------------
 drivers/media/dvb-frontends/cx24123.c             | 18 ++++----
 drivers/media/dvb-frontends/cxd2820r_c.c          |  2 +-
 drivers/media/dvb-frontends/cxd2820r_core.c       |  5 ++-
 drivers/media/dvb-frontends/cxd2820r_priv.h       |  8 ++--
 drivers/media/dvb-frontends/cxd2820r_t.c          |  2 +-
 drivers/media/dvb-frontends/cxd2820r_t2.c         |  2 +-
 drivers/media/dvb-frontends/dib3000mb.c           |  7 ++--
 drivers/media/dvb-frontends/dib3000mc.c           |  2 +-
 drivers/media/dvb-frontends/dib7000m.c            |  2 +-
 drivers/media/dvb-frontends/dib7000p.c            |  6 +--
 drivers/media/dvb-frontends/dib8000.c             | 10 ++---
 drivers/media/dvb-frontends/dib9000.c             |  4 +-
 drivers/media/dvb-frontends/drx39xyj/drxj.c       |  2 +-
 drivers/media/dvb-frontends/drxd_hard.c           |  2 +-
 drivers/media/dvb-frontends/drxk_hard.c           |  2 +-
 drivers/media/dvb-frontends/drxk_hard.h           |  2 +-
 drivers/media/dvb-frontends/ds3000.c              | 13 +++---
 drivers/media/dvb-frontends/dvb_dummy_fe.c        |  9 ++--
 drivers/media/dvb-frontends/ec100.c               |  2 +-
 drivers/media/dvb-frontends/hd29l2.c              |  2 +-
 drivers/media/dvb-frontends/hd29l2_priv.h         |  2 +-
 drivers/media/dvb-frontends/isl6405.c             |  3 +-
 drivers/media/dvb-frontends/isl6421.c             |  6 ++-
 drivers/media/dvb-frontends/l64781.c              |  2 +-
 drivers/media/dvb-frontends/lg2160.c              |  2 +-
 drivers/media/dvb-frontends/lgdt3305.c            |  4 +-
 drivers/media/dvb-frontends/lgdt3306a.c           |  9 ++--
 drivers/media/dvb-frontends/lgdt330x.c            |  8 ++--
 drivers/media/dvb-frontends/lgs8gl5.c             |  2 +-
 drivers/media/dvb-frontends/lgs8gxx.c             |  3 +-
 drivers/media/dvb-frontends/lnbp21.c              |  4 +-
 drivers/media/dvb-frontends/lnbp22.c              |  3 +-
 drivers/media/dvb-frontends/m88ds3103.c           |  9 ++--
 drivers/media/dvb-frontends/m88ds3103_priv.h      |  4 +-
 drivers/media/dvb-frontends/m88rs2000.c           | 19 +++++----
 drivers/media/dvb-frontends/mb86a16.c             |  7 ++--
 drivers/media/dvb-frontends/mb86a16.h             |  3 +-
 drivers/media/dvb-frontends/mb86a20s.c            |  6 +--
 drivers/media/dvb-frontends/mt312.c               | 17 ++++----
 drivers/media/dvb-frontends/mt352.c               |  2 +-
 drivers/media/dvb-frontends/nxt200x.c             |  2 +-
 drivers/media/dvb-frontends/nxt6000.c             | 12 ++++--
 drivers/media/dvb-frontends/or51132.c             |  6 +--
 drivers/media/dvb-frontends/or51211.c             |  2 +-
 drivers/media/dvb-frontends/rtl2830.c             |  2 +-
 drivers/media/dvb-frontends/rtl2830_priv.h        |  2 +-
 drivers/media/dvb-frontends/rtl2832.c             |  2 +-
 drivers/media/dvb-frontends/rtl2832_priv.h        |  2 +-
 drivers/media/dvb-frontends/s5h1409.c             |  6 +--
 drivers/media/dvb-frontends/s5h1411.c             |  6 +--
 drivers/media/dvb-frontends/s5h1420.c             | 23 +++++++----
 drivers/media/dvb-frontends/s5h1432.c             |  4 +-
 drivers/media/dvb-frontends/s921.c                |  6 +--
 drivers/media/dvb-frontends/si2165.c              |  2 +-
 drivers/media/dvb-frontends/si2168.c              |  2 +-
 drivers/media/dvb-frontends/si2168_priv.h         |  4 +-
 drivers/media/dvb-frontends/si21xx.c              | 10 ++---
 drivers/media/dvb-frontends/sp8870.c              |  3 +-
 drivers/media/dvb-frontends/sp887x.c              |  2 +-
 drivers/media/dvb-frontends/stb0899_drv.c         |  8 ++--
 drivers/media/dvb-frontends/stv0288.c             | 11 ++---
 drivers/media/dvb-frontends/stv0297.c             | 11 +++--
 drivers/media/dvb-frontends/stv0299.c             | 22 ++++++----
 drivers/media/dvb-frontends/stv0367.c             | 12 +++---
 drivers/media/dvb-frontends/stv0367_priv.h        |  2 +-
 drivers/media/dvb-frontends/stv0900_core.c        |  6 ++-
 drivers/media/dvb-frontends/stv090x.c             |  5 ++-
 drivers/media/dvb-frontends/stv6110.c             |  2 +-
 drivers/media/dvb-frontends/tc90522.c             | 17 ++++----
 drivers/media/dvb-frontends/tda10021.c            |  7 ++--
 drivers/media/dvb-frontends/tda10023.c            |  3 +-
 drivers/media/dvb-frontends/tda10048.c            |  2 +-
 drivers/media/dvb-frontends/tda1004x.c            |  3 +-
 drivers/media/dvb-frontends/tda10071.c            | 10 ++---
 drivers/media/dvb-frontends/tda10071_priv.h       | 10 ++---
 drivers/media/dvb-frontends/tda10086.c            |  9 ++--
 drivers/media/dvb-frontends/tda8083.c             | 38 ++++++++++-------
 drivers/media/dvb-frontends/ves1820.c             |  6 ++-
 drivers/media/dvb-frontends/ves1x93.c             | 15 ++++---
 drivers/media/dvb-frontends/zl10353.c             |  2 +-
 drivers/media/firewire/firedtv-fe.c               |  8 ++--
 drivers/media/firewire/firedtv.h                  |  4 +-
 drivers/media/pci/bt8xx/dst.c                     | 25 +++++++-----
 drivers/media/pci/bt8xx/dst_common.h              | 12 +++---
 drivers/media/pci/cx23885/cx23885-dvb.c           | 10 +++--
 drivers/media/pci/cx23885/cx23885-f300.c          |  2 +-
 drivers/media/pci/cx23885/cx23885-f300.h          |  2 +-
 drivers/media/pci/cx23885/cx23885.h               |  2 +-
 drivers/media/pci/cx88/cx88-dvb.c                 | 12 +++---
 drivers/media/pci/cx88/cx88.h                     |  5 ++-
 drivers/media/pci/dm1105/dm1105.c                 |  3 +-
 drivers/media/pci/mantis/mantis_vp1034.c          |  2 +-
 drivers/media/pci/mantis/mantis_vp1034.h          |  3 +-
 drivers/media/pci/ngene/ngene.h                   |  2 +-
 drivers/media/pci/pt1/pt1.c                       |  6 +--
 drivers/media/pci/pt1/va1j5jf8007s.c              |  4 +-
 drivers/media/pci/pt1/va1j5jf8007t.c              |  4 +-
 drivers/media/pci/pt3/pt3.c                       |  2 +-
 drivers/media/pci/saa7134/saa7134-dvb.c           |  6 ++-
 drivers/media/pci/saa7134/saa7134.h               |  3 +-
 drivers/media/pci/ttpci/av7110.c                  | 18 ++++----
 drivers/media/pci/ttpci/av7110.h                  | 27 +++++++-----
 drivers/media/pci/ttpci/budget-core.c             |  3 +-
 drivers/media/pci/ttpci/budget-patch.c            | 15 ++++---
 drivers/media/pci/ttpci/budget.c                  | 12 ++++--
 drivers/media/pci/ttpci/budget.h                  |  2 +-
 drivers/media/usb/dvb-usb-v2/af9015.c             |  2 +-
 drivers/media/usb/dvb-usb-v2/af9015.h             |  2 +-
 drivers/media/usb/dvb-usb-v2/dvbsky.c             | 11 ++---
 drivers/media/usb/dvb-usb-v2/lmedm04.c            | 10 ++---
 drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c     | 14 +++----
 drivers/media/usb/dvb-usb/af9005-fe.c             |  5 ++-
 drivers/media/usb/dvb-usb/az6027.c                |  3 +-
 drivers/media/usb/dvb-usb/cinergyT2-fe.c          |  2 +-
 drivers/media/usb/dvb-usb/dib0700.h               |  2 +-
 drivers/media/usb/dvb-usb/dib0700_devices.c       |  2 +-
 drivers/media/usb/dvb-usb/dtt200u-fe.c            |  7 ++--
 drivers/media/usb/dvb-usb/dw2102.c                | 13 +++---
 drivers/media/usb/dvb-usb/friio-fe.c              |  3 +-
 drivers/media/usb/dvb-usb/gp8psk-fe.c             | 13 +++---
 drivers/media/usb/dvb-usb/opera1.c                |  3 +-
 drivers/media/usb/dvb-usb/technisat-usb2.c        |  2 +-
 drivers/media/usb/dvb-usb/vp702x-fe.c             | 17 ++++----
 drivers/media/usb/dvb-usb/vp7045-fe.c             |  3 +-
 drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c |  9 ++--
 drivers/media/usb/ttusb-dec/ttusbdecfe.c          | 10 +++--
 drivers/staging/media/mn88472/mn88472.c           |  2 +-
 drivers/staging/media/mn88472/mn88472_priv.h      |  2 +-
 drivers/staging/media/mn88473/mn88473.c           |  2 +-
 drivers/staging/media/mn88473/mn88473_priv.h      |  2 +-
 include/uapi/linux/dvb/frontend.h                 |  4 +-
 150 files changed, 630 insertions(+), 492 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/common/b2c2/flexcop-fe-tuner.c b/drivers/media/common/b2c2/flexcop-fe-tuner.c
index 2426062fcb3c..5e5696729eca 100644
--- a/drivers/media/common/b2c2/flexcop-fe-tuner.c
+++ b/drivers/media/common/b2c2/flexcop-fe-tuner.c
@@ -39,7 +39,8 @@ static int flexcop_fe_request_firmware(struct dvb_frontend *fe,
 
 /* lnb control */
 #if FE_SUPPORTED(MT312) || FE_SUPPORTED(STV0299)
-static int flexcop_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int flexcop_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct flexcop_device *fc = fe->dvb->priv;
 	flexcop_ibi_value v;
@@ -78,7 +79,7 @@ static int flexcop_sleep(struct dvb_frontend* fe)
 
 /* SkyStar2 DVB-S rev 2.3 */
 #if FE_SUPPORTED(MT312) && FE_SUPPORTED(PLL)
-static int flexcop_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int flexcop_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 /* u16 wz_half_period_for_45_mhz[] = { 0x01ff, 0x0154, 0x00ff, 0x00cc }; */
 	struct flexcop_device *fc = fe->dvb->priv;
@@ -157,7 +158,7 @@ static int flexcop_diseqc_send_master_cmd(struct dvb_frontend *fe,
 }
 
 static int flexcop_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t minicmd)
+				     enum fe_sec_mini_cmd minicmd)
 {
 	return flexcop_send_diseqc_msg(fe, 0, NULL, minicmd);
 }
diff --git a/drivers/media/common/siano/smsdvb-main.c b/drivers/media/common/siano/smsdvb-main.c
index 367b8e77feb8..f4305ae800f4 100644
--- a/drivers/media/common/siano/smsdvb-main.c
+++ b/drivers/media/common/siano/smsdvb-main.c
@@ -753,7 +753,7 @@ static inline int led_feedback(struct smsdvb_client_t *client)
 				     SMS_LED_HI : SMS_LED_LO);
 }
 
-static int smsdvb_read_status(struct dvb_frontend *fe, fe_status_t *stat)
+static int smsdvb_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	int rc;
 	struct smsdvb_client_t *client;
@@ -900,7 +900,7 @@ static int smsdvb_dvbt_set_frontend(struct dvb_frontend *fe)
 	/* Disable LNA, if any. An error is returned if no LNA is present */
 	ret = sms_board_lna_control(client->coredev, 0);
 	if (ret == 0) {
-		fe_status_t status;
+		enum fe_status status;
 
 		/* tune with LNA off at first */
 		ret = smsdvb_sendrequest_and_wait(client, &msg, sizeof(msg),
@@ -971,7 +971,7 @@ static int smsdvb_isdbt_set_frontend(struct dvb_frontend *fe)
 	/* Disable LNA, if any. An error is returned if no LNA is present */
 	ret = sms_board_lna_control(client->coredev, 0);
 	if (ret == 0) {
-		fe_status_t status;
+		enum fe_status status;
 
 		/* tune with LNA off at first */
 		ret = smsdvb_sendrequest_and_wait(client, &msg, sizeof(msg),
diff --git a/drivers/media/common/siano/smsdvb.h b/drivers/media/common/siano/smsdvb.h
index ae36d0ae0fb1..b15754d95ec0 100644
--- a/drivers/media/common/siano/smsdvb.h
+++ b/drivers/media/common/siano/smsdvb.h
@@ -40,7 +40,7 @@ struct smsdvb_client_t {
 	struct dmxdev           dmxdev;
 	struct dvb_frontend     frontend;
 
-	fe_status_t             fe_status;
+	enum fe_status          fe_status;
 
 	struct completion       tune_done;
 	struct completion       stats_done;
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index a894d4c99ee8..55a6b0500615 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -110,7 +110,7 @@ struct dvb_frontend_private {
 	struct task_struct *thread;
 	unsigned long release_jiffies;
 	unsigned int wakeup;
-	fe_status_t status;
+	enum fe_status status;
 	unsigned long tune_mode_flags;
 	unsigned int delay;
 	unsigned int reinitialise;
@@ -198,7 +198,8 @@ static enum dvbv3_emulation_type dvbv3_type(u32 delivery_system)
 	}
 }
 
-static void dvb_frontend_add_event(struct dvb_frontend *fe, fe_status_t status)
+static void dvb_frontend_add_event(struct dvb_frontend *fe,
+				   enum fe_status status)
 {
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
 	struct dvb_fe_events *events = &fepriv->events;
@@ -429,7 +430,7 @@ static int dvb_frontend_swzigzag_autotune(struct dvb_frontend *fe, int check_wra
 
 static void dvb_frontend_swzigzag(struct dvb_frontend *fe)
 {
-	fe_status_t s = 0;
+	enum fe_status s = 0;
 	int retval = 0;
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache, tmp;
@@ -690,7 +691,7 @@ static int dvb_frontend_thread(void *data)
 {
 	struct dvb_frontend *fe = data;
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
-	fe_status_t s;
+	enum fe_status s;
 	enum dvbfe_algo algo;
 #ifdef CONFIG_MEDIA_CONTROLLER_DVB
 	int ret;
@@ -2341,7 +2342,7 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 	}
 
 	case FE_READ_STATUS: {
-		fe_status_t* status = parg;
+		enum fe_status *status = parg;
 
 		/* if retune was requested but hasn't occurred yet, prevent
 		 * that user get signal state from previous tuning */
@@ -2411,7 +2412,8 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 
 	case FE_DISEQC_SEND_BURST:
 		if (fe->ops.diseqc_send_burst) {
-			err = fe->ops.diseqc_send_burst(fe, (fe_sec_mini_cmd_t) parg);
+			err = fe->ops.diseqc_send_burst(fe,
+						(enum fe_sec_mini_cmd)parg);
 			fepriv->state = FESTATE_DISEQC;
 			fepriv->status = 0;
 		}
@@ -2419,8 +2421,9 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 
 	case FE_SET_TONE:
 		if (fe->ops.set_tone) {
-			err = fe->ops.set_tone(fe, (fe_sec_tone_mode_t) parg);
-			fepriv->tone = (fe_sec_tone_mode_t) parg;
+			err = fe->ops.set_tone(fe,
+					       (enum fe_sec_tone_mode)parg);
+			fepriv->tone = (enum fe_sec_tone_mode)parg;
 			fepriv->state = FESTATE_DISEQC;
 			fepriv->status = 0;
 		}
@@ -2428,8 +2431,9 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 
 	case FE_SET_VOLTAGE:
 		if (fe->ops.set_voltage) {
-			err = fe->ops.set_voltage(fe, (fe_sec_voltage_t) parg);
-			fepriv->voltage = (fe_sec_voltage_t) parg;
+			err = fe->ops.set_voltage(fe,
+						  (enum fe_sec_voltage)parg);
+			fepriv->voltage = (enum fe_sec_voltage)parg;
 			fepriv->state = FESTATE_DISEQC;
 			fepriv->status = 0;
 		}
@@ -2437,7 +2441,8 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 
 	case FE_DISHNETWORK_SEND_LEGACY_CMD:
 		if (fe->ops.dishnetwork_send_legacy_command) {
-			err = fe->ops.dishnetwork_send_legacy_command(fe, (unsigned long) parg);
+			err = fe->ops.dishnetwork_send_legacy_command(fe,
+							 (unsigned long)parg);
 			fepriv->state = FESTATE_DISEQC;
 			fepriv->status = 0;
 		} else if (fe->ops.set_voltage) {
diff --git a/drivers/media/dvb-core/dvb_frontend.h b/drivers/media/dvb-core/dvb_frontend.h
index 816269e5f706..4ff82041fdfd 100644
--- a/drivers/media/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb-core/dvb_frontend.h
@@ -279,7 +279,7 @@ struct dvb_frontend_ops {
 		    bool re_tune,
 		    unsigned int mode_flags,
 		    unsigned int *delay,
-		    fe_status_t *status);
+		    enum fe_status *status);
 	/* get frontend tuning algorithm from the module */
 	enum dvbfe_algo (*get_frontend_algo)(struct dvb_frontend *fe);
 
@@ -289,7 +289,7 @@ struct dvb_frontend_ops {
 
 	int (*get_frontend)(struct dvb_frontend *fe);
 
-	int (*read_status)(struct dvb_frontend* fe, fe_status_t* status);
+	int (*read_status)(struct dvb_frontend *fe, enum fe_status *status);
 	int (*read_ber)(struct dvb_frontend* fe, u32* ber);
 	int (*read_signal_strength)(struct dvb_frontend* fe, u16* strength);
 	int (*read_snr)(struct dvb_frontend* fe, u16* snr);
@@ -298,9 +298,11 @@ struct dvb_frontend_ops {
 	int (*diseqc_reset_overload)(struct dvb_frontend* fe);
 	int (*diseqc_send_master_cmd)(struct dvb_frontend* fe, struct dvb_diseqc_master_cmd* cmd);
 	int (*diseqc_recv_slave_reply)(struct dvb_frontend* fe, struct dvb_diseqc_slave_reply* reply);
-	int (*diseqc_send_burst)(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd);
-	int (*set_tone)(struct dvb_frontend* fe, fe_sec_tone_mode_t tone);
-	int (*set_voltage)(struct dvb_frontend* fe, fe_sec_voltage_t voltage);
+	int (*diseqc_send_burst)(struct dvb_frontend *fe,
+				 enum fe_sec_mini_cmd minicmd);
+	int (*set_tone)(struct dvb_frontend *fe, enum fe_sec_tone_mode tone);
+	int (*set_voltage)(struct dvb_frontend *fe,
+			   enum fe_sec_voltage voltage);
 	int (*enable_high_lnb_voltage)(struct dvb_frontend* fe, long arg);
 	int (*dishnetwork_send_legacy_command)(struct dvb_frontend* fe, unsigned long cmd);
 	int (*i2c_gate_ctrl)(struct dvb_frontend* fe, int enable);
@@ -338,24 +340,24 @@ struct dtv_frontend_properties {
 	u32			state;
 
 	u32			frequency;
-	fe_modulation_t		modulation;
+	enum fe_modulation		modulation;
 
-	fe_sec_voltage_t	voltage;
-	fe_sec_tone_mode_t	sectone;
-	fe_spectral_inversion_t	inversion;
-	fe_code_rate_t		fec_inner;
-	fe_transmit_mode_t	transmission_mode;
+	enum fe_sec_voltage	voltage;
+	enum fe_sec_tone_mode	sectone;
+	enum fe_spectral_inversion	inversion;
+	enum fe_code_rate		fec_inner;
+	enum fe_transmit_mode	transmission_mode;
 	u32			bandwidth_hz;	/* 0 = AUTO */
-	fe_guard_interval_t	guard_interval;
-	fe_hierarchy_t		hierarchy;
+	enum fe_guard_interval	guard_interval;
+	enum fe_hierarchy		hierarchy;
 	u32			symbol_rate;
-	fe_code_rate_t		code_rate_HP;
-	fe_code_rate_t		code_rate_LP;
+	enum fe_code_rate		code_rate_HP;
+	enum fe_code_rate		code_rate_LP;
 
-	fe_pilot_t		pilot;
-	fe_rolloff_t		rolloff;
+	enum fe_pilot		pilot;
+	enum fe_rolloff		rolloff;
 
-	fe_delivery_system_t	delivery_system;
+	enum fe_delivery_system	delivery_system;
 
 	enum fe_interleaving	interleaving;
 
@@ -368,8 +370,8 @@ struct dtv_frontend_properties {
 	u8			isdbt_layer_enabled;
 	struct {
 	    u8			segment_count;
-	    fe_code_rate_t	fec;
-	    fe_modulation_t	modulation;
+	    enum fe_code_rate	fec;
+	    enum fe_modulation	modulation;
 	    u8			interleaving;
 	} layer[3];
 
diff --git a/drivers/media/dvb-frontends/a8293.c b/drivers/media/dvb-frontends/a8293.c
index 3f0cf9ee6672..97ecbe01034c 100644
--- a/drivers/media/dvb-frontends/a8293.c
+++ b/drivers/media/dvb-frontends/a8293.c
@@ -67,7 +67,7 @@ static int a8293_rd(struct a8293_priv *priv, u8 *val, int len)
 }
 
 static int a8293_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t fe_sec_voltage)
+	enum fe_sec_voltage fe_sec_voltage)
 {
 	struct a8293_priv *priv = fe->sec_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/af9013.c b/drivers/media/dvb-frontends/af9013.c
index ba6c8f6c42a1..e23197da84af 100644
--- a/drivers/media/dvb-frontends/af9013.c
+++ b/drivers/media/dvb-frontends/af9013.c
@@ -39,7 +39,7 @@ struct af9013_state {
 	u32 ucblocks;
 	u16 snr;
 	u32 bandwidth_hz;
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 	unsigned long set_frontend_jiffies;
 	unsigned long read_status_jiffies;
 	bool first_tune;
@@ -983,7 +983,7 @@ err:
 	return ret;
 }
 
-static int af9013_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int af9013_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct af9013_state *state = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/af9033.c b/drivers/media/dvb-frontends/af9033.c
index 82ce47bdf5dc..59018afaa95f 100644
--- a/drivers/media/dvb-frontends/af9033.c
+++ b/drivers/media/dvb-frontends/af9033.c
@@ -35,7 +35,7 @@ struct af9033_dev {
 	bool ts_mode_parallel;
 	bool ts_mode_serial;
 
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 	u64 post_bit_error_prev; /* for old read_ber we return (curr - prev) */
 	u64 post_bit_error;
 	u64 post_bit_count;
@@ -818,7 +818,7 @@ err:
 	return ret;
 }
 
-static int af9033_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int af9033_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct af9033_dev *dev = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/as102_fe.c b/drivers/media/dvb-frontends/as102_fe.c
index 493665899565..544c5f65d19a 100644
--- a/drivers/media/dvb-frontends/as102_fe.c
+++ b/drivers/media/dvb-frontends/as102_fe.c
@@ -32,7 +32,7 @@ struct as102_state {
 	uint32_t ber;
 };
 
-static uint8_t as102_fe_get_code_rate(fe_code_rate_t arg)
+static uint8_t as102_fe_get_code_rate(enum fe_code_rate arg)
 {
 	uint8_t c;
 
@@ -306,7 +306,7 @@ static int as102_fe_get_tune_settings(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int as102_fe_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int as102_fe_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	int ret = 0;
 	struct as102_state *state = fe->demodulator_priv;
diff --git a/drivers/media/dvb-frontends/atbm8830.c b/drivers/media/dvb-frontends/atbm8830.c
index 4e11dc4b1335..8fe552e293ed 100644
--- a/drivers/media/dvb-frontends/atbm8830.c
+++ b/drivers/media/dvb-frontends/atbm8830.c
@@ -335,7 +335,8 @@ static int atbm8830_get_tune_settings(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int atbm8830_read_status(struct dvb_frontend *fe, fe_status_t *fe_status)
+static int atbm8830_read_status(struct dvb_frontend *fe,
+				enum fe_status *fe_status)
 {
 	struct atbm_state *priv = fe->demodulator_priv;
 	u8 locked = 0;
diff --git a/drivers/media/dvb-frontends/au8522_dig.c b/drivers/media/dvb-frontends/au8522_dig.c
index 5d06c99b0e97..b744a3f8d467 100644
--- a/drivers/media/dvb-frontends/au8522_dig.c
+++ b/drivers/media/dvb-frontends/au8522_dig.c
@@ -552,7 +552,7 @@ static struct {
 };
 
 static int au8522_enable_modulation(struct dvb_frontend *fe,
-				    fe_modulation_t m)
+				    enum fe_modulation m)
 {
 	struct au8522_state *state = fe->demodulator_priv;
 	int i;
@@ -644,7 +644,7 @@ static int au8522_set_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int au8522_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int au8522_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct au8522_state *state = fe->demodulator_priv;
 	u8 reg;
diff --git a/drivers/media/dvb-frontends/au8522_priv.h b/drivers/media/dvb-frontends/au8522_priv.h
index b8aca1c84786..951b3847e6f6 100644
--- a/drivers/media/dvb-frontends/au8522_priv.h
+++ b/drivers/media/dvb-frontends/au8522_priv.h
@@ -55,7 +55,7 @@ struct au8522_state {
 	struct dvb_frontend frontend;
 
 	u32 current_frequency;
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 
 	u32 fe_status;
 	unsigned int led_state;
diff --git a/drivers/media/dvb-frontends/bcm3510.c b/drivers/media/dvb-frontends/bcm3510.c
index 23bfd00d42db..d30275f27644 100644
--- a/drivers/media/dvb-frontends/bcm3510.c
+++ b/drivers/media/dvb-frontends/bcm3510.c
@@ -289,7 +289,7 @@ static int bcm3510_refresh_state(struct bcm3510_state *st)
 	return 0;
 }
 
-static int bcm3510_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int bcm3510_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct bcm3510_state* st = fe->demodulator_priv;
 	bcm3510_refresh_state(st);
diff --git a/drivers/media/dvb-frontends/cx22700.c b/drivers/media/dvb-frontends/cx22700.c
index 86563260d0f2..fd033cca6e11 100644
--- a/drivers/media/dvb-frontends/cx22700.c
+++ b/drivers/media/dvb-frontends/cx22700.c
@@ -191,9 +191,10 @@ static int cx22700_set_tps(struct cx22700_state *state,
 static int cx22700_get_tps(struct cx22700_state *state,
 			   struct dtv_frontend_properties *p)
 {
-	static const fe_modulation_t qam_tab [3] = { QPSK, QAM_16, QAM_64 };
-	static const fe_code_rate_t fec_tab [5] = { FEC_1_2, FEC_2_3, FEC_3_4,
-						    FEC_5_6, FEC_7_8 };
+	static const enum fe_modulation qam_tab[3] = { QPSK, QAM_16, QAM_64 };
+	static const enum fe_code_rate fec_tab[5] = {
+		FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_7_8
+	};
 	u8 val;
 
 	dprintk ("%s\n", __func__);
@@ -253,7 +254,7 @@ static int cx22700_init (struct dvb_frontend* fe)
 	return 0;
 }
 
-static int cx22700_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int cx22700_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx22700_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/cx22702.c b/drivers/media/dvb-frontends/cx22702.c
index edc8eafc5c09..d2d06dcd7683 100644
--- a/drivers/media/dvb-frontends/cx22702.c
+++ b/drivers/media/dvb-frontends/cx22702.c
@@ -452,7 +452,7 @@ static int cx22702_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int cx22702_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx22702_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx22702_state *state = fe->demodulator_priv;
 	u8 reg0A;
diff --git a/drivers/media/dvb-frontends/cx24110.c b/drivers/media/dvb-frontends/cx24110.c
index 7b510f2ae20f..cb36475e322b 100644
--- a/drivers/media/dvb-frontends/cx24110.c
+++ b/drivers/media/dvb-frontends/cx24110.c
@@ -143,7 +143,8 @@ static int cx24110_readreg (struct cx24110_state* state, u8 reg)
 	return b1[0];
 }
 
-static int cx24110_set_inversion (struct cx24110_state* state, fe_spectral_inversion_t inversion)
+static int cx24110_set_inversion(struct cx24110_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 /* fixme (low): error handling */
 
@@ -177,7 +178,7 @@ static int cx24110_set_inversion (struct cx24110_state* state, fe_spectral_inver
 	return 0;
 }
 
-static int cx24110_set_fec(struct cx24110_state* state, fe_code_rate_t fec)
+static int cx24110_set_fec(struct cx24110_state *state, enum fe_code_rate fec)
 {
 	static const int rate[FEC_AUTO] = {-1,    1,    2,    3,    5,    7, -1};
 	static const int g1[FEC_AUTO]   = {-1, 0x01, 0x02, 0x05, 0x15, 0x45, -1};
@@ -220,7 +221,7 @@ static int cx24110_set_fec(struct cx24110_state* state, fe_code_rate_t fec)
 	return 0;
 }
 
-static fe_code_rate_t cx24110_get_fec (struct cx24110_state* state)
+static enum fe_code_rate cx24110_get_fec(struct cx24110_state *state)
 {
 	int i;
 
@@ -365,7 +366,8 @@ static int cx24110_initfe(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int cx24110_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int cx24110_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct cx24110_state *state = fe->demodulator_priv;
 
@@ -379,7 +381,8 @@ static int cx24110_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltag
 	}
 }
 
-static int cx24110_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t burst)
+static int cx24110_diseqc_send_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	int rv, bit;
 	struct cx24110_state *state = fe->demodulator_priv;
@@ -434,7 +437,8 @@ static int cx24110_send_diseqc_msg(struct dvb_frontend* fe,
 	return 0;
 }
 
-static int cx24110_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int cx24110_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct cx24110_state *state = fe->demodulator_priv;
 
@@ -574,7 +578,8 @@ static int cx24110_get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int cx24110_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int cx24110_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode tone)
 {
 	struct cx24110_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c
index 7bc68b355c0b..8814f36d53fb 100644
--- a/drivers/media/dvb-frontends/cx24116.c
+++ b/drivers/media/dvb-frontends/cx24116.c
@@ -160,13 +160,13 @@ enum cmds {
 struct cx24116_tuning {
 	u32 frequency;
 	u32 symbol_rate;
-	fe_spectral_inversion_t inversion;
-	fe_code_rate_t fec;
+	enum fe_spectral_inversion inversion;
+	enum fe_code_rate fec;
 
-	fe_delivery_system_t delsys;
-	fe_modulation_t modulation;
-	fe_pilot_t pilot;
-	fe_rolloff_t rolloff;
+	enum fe_delivery_system delsys;
+	enum fe_modulation modulation;
+	enum fe_pilot pilot;
+	enum fe_rolloff rolloff;
 
 	/* Demod values */
 	u8 fec_val;
@@ -285,7 +285,7 @@ static int cx24116_readreg(struct cx24116_state *state, u8 reg)
 }
 
 static int cx24116_set_inversion(struct cx24116_state *state,
-	fe_spectral_inversion_t inversion)
+	enum fe_spectral_inversion inversion)
 {
 	dprintk("%s(%d)\n", __func__, inversion);
 
@@ -373,9 +373,9 @@ static int cx24116_set_inversion(struct cx24116_state *state,
  * a scheme are support. Especially, no auto detect when in S2 mode.
  */
 static struct cx24116_modfec {
-	fe_delivery_system_t delivery_system;
-	fe_modulation_t modulation;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delivery_system;
+	enum fe_modulation modulation;
+	enum fe_code_rate fec;
 	u8 mask;	/* In DVBS mode this is used to autodetect */
 	u8 val;		/* Passed to the firmware to indicate mode selection */
 } CX24116_MODFEC_MODES[] = {
@@ -415,7 +415,7 @@ static struct cx24116_modfec {
 };
 
 static int cx24116_lookup_fecmod(struct cx24116_state *state,
-	fe_delivery_system_t d, fe_modulation_t m, fe_code_rate_t f)
+	enum fe_delivery_system d, enum fe_modulation m, enum fe_code_rate f)
 {
 	int i, ret = -EOPNOTSUPP;
 
@@ -434,7 +434,9 @@ static int cx24116_lookup_fecmod(struct cx24116_state *state,
 }
 
 static int cx24116_set_fec(struct cx24116_state *state,
-	fe_delivery_system_t delsys, fe_modulation_t mod, fe_code_rate_t fec)
+			   enum fe_delivery_system delsys,
+			   enum fe_modulation mod,
+			   enum fe_code_rate fec)
 {
 	int ret = 0;
 
@@ -683,7 +685,7 @@ static int cx24116_load_firmware(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int cx24116_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx24116_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx24116_state *state = fe->demodulator_priv;
 
@@ -844,7 +846,7 @@ static int cx24116_wait_for_lnb(struct dvb_frontend *fe)
 }
 
 static int cx24116_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+	enum fe_sec_voltage voltage)
 {
 	struct cx24116_cmd cmd;
 	int ret;
@@ -872,7 +874,7 @@ static int cx24116_set_voltage(struct dvb_frontend *fe,
 }
 
 static int cx24116_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t tone)
+	enum fe_sec_tone_mode tone)
 {
 	struct cx24116_cmd cmd;
 	int ret;
@@ -1055,7 +1057,7 @@ static int cx24116_send_diseqc_msg(struct dvb_frontend *fe,
 
 /* Send DiSEqC burst */
 static int cx24116_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t burst)
+	enum fe_sec_mini_cmd burst)
 {
 	struct cx24116_state *state = fe->demodulator_priv;
 	int ret;
@@ -1220,7 +1222,7 @@ static int cx24116_set_frontend(struct dvb_frontend *fe)
 	struct cx24116_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct cx24116_cmd cmd;
-	fe_status_t tunerstat;
+	enum fe_status tunerstat;
 	int i, status, ret, retune = 1;
 
 	dprintk("%s()\n", __func__);
@@ -1441,7 +1443,7 @@ tuned:  /* Set/Reset B/W */
 }
 
 static int cx24116_tune(struct dvb_frontend *fe, bool re_tune,
-	unsigned int mode_flags, unsigned int *delay, fe_status_t *status)
+	unsigned int mode_flags, unsigned int *delay, enum fe_status *status)
 {
 	/*
 	 * It is safe to discard "params" here, as the DVB core will sync
diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c
index af6363573efd..5f77bc80a896 100644
--- a/drivers/media/dvb-frontends/cx24117.c
+++ b/drivers/media/dvb-frontends/cx24117.c
@@ -171,13 +171,13 @@ static DEFINE_MUTEX(cx24117_list_mutex);
 struct cx24117_tuning {
 	u32 frequency;
 	u32 symbol_rate;
-	fe_spectral_inversion_t inversion;
-	fe_code_rate_t fec;
+	enum fe_spectral_inversion inversion;
+	enum fe_code_rate fec;
 
-	fe_delivery_system_t delsys;
-	fe_modulation_t modulation;
-	fe_pilot_t pilot;
-	fe_rolloff_t rolloff;
+	enum fe_delivery_system delsys;
+	enum fe_modulation modulation;
+	enum fe_pilot pilot;
+	enum fe_rolloff rolloff;
 
 	/* Demod values */
 	u8 fec_val;
@@ -220,9 +220,9 @@ struct cx24117_state {
 /* modfec (modulation and FEC) lookup table */
 /* Check cx24116.c for a detailed description of each field */
 static struct cx24117_modfec {
-	fe_delivery_system_t delivery_system;
-	fe_modulation_t modulation;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delivery_system;
+	enum fe_modulation modulation;
+	enum fe_code_rate fec;
 	u8 mask;	/* In DVBS mode this is used to autodetect */
 	u8 val;		/* Passed to the firmware to indicate mode selection */
 } cx24117_modfec_modes[] = {
@@ -362,7 +362,7 @@ static int cx24117_readregN(struct cx24117_state *state,
 }
 
 static int cx24117_set_inversion(struct cx24117_state *state,
-	fe_spectral_inversion_t inversion)
+	enum fe_spectral_inversion inversion)
 {
 	dev_dbg(&state->priv->i2c->dev, "%s(%d) demod%d\n",
 		__func__, inversion, state->demod);
@@ -387,7 +387,7 @@ static int cx24117_set_inversion(struct cx24117_state *state,
 }
 
 static int cx24117_lookup_fecmod(struct cx24117_state *state,
-	fe_delivery_system_t d, fe_modulation_t m, fe_code_rate_t f)
+	enum fe_delivery_system d, enum fe_modulation m, enum fe_code_rate f)
 {
 	int i, ret = -EINVAL;
 
@@ -408,7 +408,9 @@ static int cx24117_lookup_fecmod(struct cx24117_state *state,
 }
 
 static int cx24117_set_fec(struct cx24117_state *state,
-	fe_delivery_system_t delsys, fe_modulation_t mod, fe_code_rate_t fec)
+			   enum fe_delivery_system delsys,
+			   enum fe_modulation mod,
+			   enum fe_code_rate fec)
 {
 	int ret;
 
@@ -737,7 +739,7 @@ error:
 	return ret;
 }
 
-static int cx24117_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx24117_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 	int lock;
@@ -843,7 +845,7 @@ static int cx24117_read_snr(struct dvb_frontend *fe, u16 *snr)
 static int cx24117_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
-	fe_delivery_system_t delsys = fe->dtv_property_cache.delivery_system;
+	enum fe_delivery_system delsys = fe->dtv_property_cache.delivery_system;
 	int ret;
 	u8 buf[2];
 	u8 reg = (state->demod == 0) ?
@@ -904,7 +906,7 @@ static int cx24117_wait_for_lnb(struct dvb_frontend *fe)
 }
 
 static int cx24117_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+			       enum fe_sec_voltage voltage)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 	struct cx24117_cmd cmd;
@@ -956,7 +958,7 @@ static int cx24117_set_voltage(struct dvb_frontend *fe,
 }
 
 static int cx24117_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t tone)
+			    enum fe_sec_tone_mode tone)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 	struct cx24117_cmd cmd;
@@ -1112,7 +1114,7 @@ static int cx24117_send_diseqc_msg(struct dvb_frontend *fe,
 
 /* Send DiSEqC burst */
 static int cx24117_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t burst)
+	enum fe_sec_mini_cmd burst)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 
@@ -1306,7 +1308,7 @@ static int cx24117_set_frontend(struct dvb_frontend *fe)
 	struct cx24117_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct cx24117_cmd cmd;
-	fe_status_t tunerstat;
+	enum fe_status tunerstat;
 	int i, status, ret, retune = 1;
 	u8 reg_clkdiv, reg_ratediv;
 
@@ -1537,7 +1539,7 @@ static int cx24117_set_frontend(struct dvb_frontend *fe)
 }
 
 static int cx24117_tune(struct dvb_frontend *fe, bool re_tune,
-	unsigned int mode_flags, unsigned int *delay, fe_status_t *status)
+	unsigned int mode_flags, unsigned int *delay, enum fe_status *status)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/cx24120.c b/drivers/media/dvb-frontends/cx24120.c
index a1d1b1c44b40..3b0ef52bb834 100644
--- a/drivers/media/dvb-frontends/cx24120.c
+++ b/drivers/media/dvb-frontends/cx24120.c
@@ -118,12 +118,12 @@ enum command_message_id {
 struct cx24120_tuning {
 	u32 frequency;
 	u32 symbol_rate;
-	fe_spectral_inversion_t inversion;
-	fe_code_rate_t fec;
+	enum fe_spectral_inversion inversion;
+	enum fe_code_rate fec;
 
-	fe_delivery_system_t delsys;
-	fe_modulation_t modulation;
-	fe_pilot_t pilot;
+	enum fe_delivery_system delsys;
+	enum fe_modulation modulation;
+	enum fe_pilot pilot;
 
 	/* Demod values */
 	u8 fec_val;
@@ -148,7 +148,7 @@ struct cx24120_state {
 	struct cx24120_tuning dcur;
 	struct cx24120_tuning dnxt;
 
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 
 	/* dvbv5 stats calculations */
 	u32 bitrate;
@@ -491,7 +491,7 @@ static int cx24120_msg_mpeg_output_config(struct cx24120_state *state, u8 seq)
 }
 
 static int cx24120_diseqc_send_burst(struct dvb_frontend *fe,
-				     fe_sec_mini_cmd_t burst)
+				     enum fe_sec_mini_cmd burst)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	struct cx24120_cmd cmd;
@@ -513,7 +513,7 @@ static int cx24120_diseqc_send_burst(struct dvb_frontend *fe,
 	return cx24120_message_send(state, &cmd);
 }
 
-static int cx24120_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int cx24120_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	struct cx24120_cmd cmd;
@@ -536,7 +536,7 @@ static int cx24120_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
 }
 
 static int cx24120_set_voltage(struct dvb_frontend *fe,
-			       fe_sec_voltage_t voltage)
+			       enum fe_sec_voltage voltage)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	struct cx24120_cmd cmd;
@@ -713,7 +713,7 @@ static void cx24120_get_stats(struct cx24120_state *state)
 static void cx24120_set_clock_ratios(struct dvb_frontend *fe);
 
 /* Read current tuning status */
-static int cx24120_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx24120_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	int lock;
@@ -765,9 +765,9 @@ static int cx24120_read_status(struct dvb_frontend *fe, fe_status_t *status)
  * once tuned in.
  */
 struct cx24120_modfec {
-	fe_delivery_system_t delsys;
-	fe_modulation_t mod;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delsys;
+	enum fe_modulation mod;
+	enum fe_code_rate fec;
 	u8 val;
 };
 
@@ -871,10 +871,10 @@ static void cx24120_calculate_ber_window(struct cx24120_state *state, u32 rate)
  * can't determine the pattern
  */
 struct cx24120_clock_ratios_table {
-	fe_delivery_system_t delsys;
-	fe_pilot_t pilot;
-	fe_modulation_t mod;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delsys;
+	enum fe_pilot pilot;
+	enum fe_modulation mod;
+	enum fe_code_rate fec;
 	u32 m_rat;
 	u32 n_rat;
 	u32 rate;
@@ -988,7 +988,7 @@ static void cx24120_set_clock_ratios(struct dvb_frontend *fe)
 
 /* Set inversion value */
 static int cx24120_set_inversion(struct cx24120_state *state,
-				 fe_spectral_inversion_t inversion)
+				 enum fe_spectral_inversion inversion)
 {
 	dev_dbg(&state->i2c->dev, "(%d)\n", inversion);
 
@@ -1013,9 +1013,9 @@ static int cx24120_set_inversion(struct cx24120_state *state,
 
 /* FEC lookup table for tuning */
 struct cx24120_modfec_table {
-	fe_delivery_system_t delsys;
-	fe_modulation_t mod;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delsys;
+	enum fe_modulation mod;
+	enum fe_code_rate fec;
 	u8 val;
 };
 
@@ -1046,8 +1046,8 @@ static const struct cx24120_modfec_table modfec_table[] = {
 };
 
 /* Set fec_val & fec_mask values from delsys, modulation & fec */
-static int cx24120_set_fec(struct cx24120_state *state, fe_modulation_t mod,
-			   fe_code_rate_t fec)
+static int cx24120_set_fec(struct cx24120_state *state, enum fe_modulation mod,
+			   enum fe_code_rate fec)
 {
 	int idx;
 
@@ -1084,7 +1084,7 @@ static int cx24120_set_fec(struct cx24120_state *state, fe_modulation_t mod,
 }
 
 /* Set pilot */
-static int cx24120_set_pilot(struct cx24120_state *state, fe_pilot_t pilot)
+static int cx24120_set_pilot(struct cx24120_state *state, enum fe_pilot pilot)
 {
 	dev_dbg(&state->i2c->dev, "(%d)\n", pilot);
 
@@ -1474,7 +1474,7 @@ static int cx24120_init(struct dvb_frontend *fe)
 
 static int cx24120_tune(struct dvb_frontend *fe, bool re_tune,
 			unsigned int mode_flags, unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/cx24123.c b/drivers/media/dvb-frontends/cx24123.c
index 7975c6608e20..e18cf9e1185e 100644
--- a/drivers/media/dvb-frontends/cx24123.c
+++ b/drivers/media/dvb-frontends/cx24123.c
@@ -290,7 +290,7 @@ static int cx24123_i2c_readreg(struct cx24123_state *state, u8 i2c_addr, u8 reg)
 	cx24123_i2c_writereg(state, state->config->demod_address, reg, val)
 
 static int cx24123_set_inversion(struct cx24123_state *state,
-	fe_spectral_inversion_t inversion)
+				 enum fe_spectral_inversion inversion)
 {
 	u8 nom_reg = cx24123_readreg(state, 0x0e);
 	u8 auto_reg = cx24123_readreg(state, 0x10);
@@ -318,7 +318,7 @@ static int cx24123_set_inversion(struct cx24123_state *state,
 }
 
 static int cx24123_get_inversion(struct cx24123_state *state,
-	fe_spectral_inversion_t *inversion)
+				 enum fe_spectral_inversion *inversion)
 {
 	u8 val;
 
@@ -335,7 +335,7 @@ static int cx24123_get_inversion(struct cx24123_state *state,
 	return 0;
 }
 
-static int cx24123_set_fec(struct cx24123_state *state, fe_code_rate_t fec)
+static int cx24123_set_fec(struct cx24123_state *state, enum fe_code_rate fec)
 {
 	u8 nom_reg = cx24123_readreg(state, 0x0e) & ~0x07;
 
@@ -397,7 +397,7 @@ static int cx24123_set_fec(struct cx24123_state *state, fe_code_rate_t fec)
 	return 0;
 }
 
-static int cx24123_get_fec(struct cx24123_state *state, fe_code_rate_t *fec)
+static int cx24123_get_fec(struct cx24123_state *state, enum fe_code_rate *fec)
 {
 	int ret;
 
@@ -720,7 +720,7 @@ static int cx24123_initfe(struct dvb_frontend *fe)
 }
 
 static int cx24123_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+			       enum fe_sec_voltage voltage)
 {
 	struct cx24123_state *state = fe->demodulator_priv;
 	u8 val;
@@ -795,7 +795,7 @@ static int cx24123_send_diseqc_msg(struct dvb_frontend *fe,
 }
 
 static int cx24123_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t burst)
+				     enum fe_sec_mini_cmd burst)
 {
 	struct cx24123_state *state = fe->demodulator_priv;
 	int val, tone;
@@ -831,7 +831,7 @@ static int cx24123_diseqc_send_burst(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int cx24123_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx24123_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx24123_state *state = fe->demodulator_priv;
 	int sync = cx24123_readreg(state, 0x14);
@@ -966,7 +966,7 @@ static int cx24123_get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int cx24123_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int cx24123_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct cx24123_state *state = fe->demodulator_priv;
 	u8 val;
@@ -995,7 +995,7 @@ static int cx24123_tune(struct dvb_frontend *fe,
 			bool re_tune,
 			unsigned int mode_flags,
 			unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	int retval = 0;
 
diff --git a/drivers/media/dvb-frontends/cxd2820r_c.c b/drivers/media/dvb-frontends/cxd2820r_c.c
index 72b0e2db3aab..42fad6aa3958 100644
--- a/drivers/media/dvb-frontends/cxd2820r_c.c
+++ b/drivers/media/dvb-frontends/cxd2820r_c.c
@@ -259,7 +259,7 @@ int cxd2820r_read_ucblocks_c(struct dvb_frontend *fe, u32 *ucblocks)
 	return 0;
 }
 
-int cxd2820r_read_status_c(struct dvb_frontend *fe, fe_status_t *status)
+int cxd2820r_read_status_c(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/cxd2820r_core.c b/drivers/media/dvb-frontends/cxd2820r_core.c
index 490e090048ef..def6d21d1445 100644
--- a/drivers/media/dvb-frontends/cxd2820r_core.c
+++ b/drivers/media/dvb-frontends/cxd2820r_core.c
@@ -287,7 +287,8 @@ static int cxd2820r_set_frontend(struct dvb_frontend *fe)
 err:
 	return ret;
 }
-static int cxd2820r_read_status(struct dvb_frontend *fe, fe_status_t *status)
+
+static int cxd2820r_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	int ret;
@@ -501,7 +502,7 @@ static enum dvbfe_search cxd2820r_search(struct dvb_frontend *fe)
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	int ret, i;
-	fe_status_t status = 0;
+	enum fe_status status = 0;
 
 	dev_dbg(&priv->i2c->dev, "%s: delsys=%d\n", __func__,
 			fe->dtv_property_cache.delivery_system);
diff --git a/drivers/media/dvb-frontends/cxd2820r_priv.h b/drivers/media/dvb-frontends/cxd2820r_priv.h
index 4b428959b16e..a0d53f01a8bf 100644
--- a/drivers/media/dvb-frontends/cxd2820r_priv.h
+++ b/drivers/media/dvb-frontends/cxd2820r_priv.h
@@ -48,7 +48,7 @@ struct cxd2820r_priv {
 	struct gpio_chip gpio_chip;
 #endif
 
-	fe_delivery_system_t delivery_system;
+	enum fe_delivery_system delivery_system;
 	bool last_tune_failed; /* for switch between T and T2 tune */
 };
 
@@ -80,7 +80,7 @@ int cxd2820r_get_frontend_c(struct dvb_frontend *fe);
 
 int cxd2820r_set_frontend_c(struct dvb_frontend *fe);
 
-int cxd2820r_read_status_c(struct dvb_frontend *fe, fe_status_t *status);
+int cxd2820r_read_status_c(struct dvb_frontend *fe, enum fe_status *status);
 
 int cxd2820r_read_ber_c(struct dvb_frontend *fe, u32 *ber);
 
@@ -103,7 +103,7 @@ int cxd2820r_get_frontend_t(struct dvb_frontend *fe);
 
 int cxd2820r_set_frontend_t(struct dvb_frontend *fe);
 
-int cxd2820r_read_status_t(struct dvb_frontend *fe, fe_status_t *status);
+int cxd2820r_read_status_t(struct dvb_frontend *fe, enum fe_status *status);
 
 int cxd2820r_read_ber_t(struct dvb_frontend *fe, u32 *ber);
 
@@ -126,7 +126,7 @@ int cxd2820r_get_frontend_t2(struct dvb_frontend *fe);
 
 int cxd2820r_set_frontend_t2(struct dvb_frontend *fe);
 
-int cxd2820r_read_status_t2(struct dvb_frontend *fe, fe_status_t *status);
+int cxd2820r_read_status_t2(struct dvb_frontend *fe, enum fe_status *status);
 
 int cxd2820r_read_ber_t2(struct dvb_frontend *fe, u32 *ber);
 
diff --git a/drivers/media/dvb-frontends/cxd2820r_t.c b/drivers/media/dvb-frontends/cxd2820r_t.c
index 008cb2ac8480..21abf1b4ed4d 100644
--- a/drivers/media/dvb-frontends/cxd2820r_t.c
+++ b/drivers/media/dvb-frontends/cxd2820r_t.c
@@ -349,7 +349,7 @@ int cxd2820r_read_ucblocks_t(struct dvb_frontend *fe, u32 *ucblocks)
 	return 0;
 }
 
-int cxd2820r_read_status_t(struct dvb_frontend *fe, fe_status_t *status)
+int cxd2820r_read_status_t(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/cxd2820r_t2.c b/drivers/media/dvb-frontends/cxd2820r_t2.c
index 35fe364c7182..4e028b41c0d5 100644
--- a/drivers/media/dvb-frontends/cxd2820r_t2.c
+++ b/drivers/media/dvb-frontends/cxd2820r_t2.c
@@ -284,7 +284,7 @@ error:
 	return ret;
 }
 
-int cxd2820r_read_status_t2(struct dvb_frontend *fe, fe_status_t *status)
+int cxd2820r_read_status_t2(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c
index af91e0c92339..7a61172d0d45 100644
--- a/drivers/media/dvb-frontends/dib3000mb.c
+++ b/drivers/media/dvb-frontends/dib3000mb.c
@@ -118,7 +118,7 @@ static int dib3000mb_set_frontend(struct dvb_frontend *fe, int tuner)
 {
 	struct dib3000_state* state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	fe_code_rate_t fe_cr = FEC_NONE;
+	enum fe_code_rate fe_cr = FEC_NONE;
 	int search_state, seq;
 
 	if (tuner && fe->ops.tuner_ops.set_params) {
@@ -454,7 +454,7 @@ static int dib3000mb_get_frontend(struct dvb_frontend* fe)
 {
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct dib3000_state* state = fe->demodulator_priv;
-	fe_code_rate_t *cr;
+	enum fe_code_rate *cr;
 	u16 tps_val;
 	int inv_test1,inv_test2;
 	u32 dds_val, threshold = 0x800000;
@@ -611,7 +611,8 @@ static int dib3000mb_get_frontend(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int dib3000mb_read_status(struct dvb_frontend* fe, fe_status_t *stat)
+static int dib3000mb_read_status(struct dvb_frontend *fe,
+				 enum fe_status *stat)
 {
 	struct dib3000_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/dib3000mc.c b/drivers/media/dvb-frontends/dib3000mc.c
index a9b8081a0fbc..583d6b7fabed 100644
--- a/drivers/media/dvb-frontends/dib3000mc.c
+++ b/drivers/media/dvb-frontends/dib3000mc.c
@@ -736,7 +736,7 @@ static int dib3000mc_set_frontend(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int dib3000mc_read_status(struct dvb_frontend *fe, fe_status_t *stat)
+static int dib3000mc_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib3000mc_state *state = fe->demodulator_priv;
 	u16 lock = dib3000mc_read_word(state, 509);
diff --git a/drivers/media/dvb-frontends/dib7000m.c b/drivers/media/dvb-frontends/dib7000m.c
index dcb9a15ef0c2..35eb71fe3c2b 100644
--- a/drivers/media/dvb-frontends/dib7000m.c
+++ b/drivers/media/dvb-frontends/dib7000m.c
@@ -1256,7 +1256,7 @@ static int dib7000m_set_frontend(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int dib7000m_read_status(struct dvb_frontend *fe, fe_status_t *stat)
+static int dib7000m_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib7000m_state *state = fe->demodulator_priv;
 	u16 lock = dib7000m_read_word(state, 535);
diff --git a/drivers/media/dvb-frontends/dib7000p.c b/drivers/media/dvb-frontends/dib7000p.c
index c505d696f92d..33be5d6b9e10 100644
--- a/drivers/media/dvb-frontends/dib7000p.c
+++ b/drivers/media/dvb-frontends/dib7000p.c
@@ -1558,9 +1558,9 @@ static int dib7000p_set_frontend(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int dib7000p_get_stats(struct dvb_frontend *fe, fe_status_t stat);
+static int dib7000p_get_stats(struct dvb_frontend *fe, enum fe_status stat);
 
-static int dib7000p_read_status(struct dvb_frontend *fe, fe_status_t * stat)
+static int dib7000p_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib7000p_state *state = fe->demodulator_priv;
 	u16 lock = dib7000p_read_word(state, 509);
@@ -1877,7 +1877,7 @@ static u32 dib7000p_get_time_us(struct dvb_frontend *demod)
 	return time_us;
 }
 
-static int dib7000p_get_stats(struct dvb_frontend *demod, fe_status_t stat)
+static int dib7000p_get_stats(struct dvb_frontend *demod, enum fe_status stat)
 {
 	struct dib7000p_state *state = demod->demodulator_priv;
 	struct dtv_frontend_properties *c = &demod->dtv_property_cache;
diff --git a/drivers/media/dvb-frontends/dib8000.c b/drivers/media/dvb-frontends/dib8000.c
index 8c6663b6399d..94c26270fff0 100644
--- a/drivers/media/dvb-frontends/dib8000.c
+++ b/drivers/media/dvb-frontends/dib8000.c
@@ -3380,13 +3380,13 @@ static int dib8000_sleep(struct dvb_frontend *fe)
 	return dib8000_set_adc_state(state, DIBX000_SLOW_ADC_OFF) | dib8000_set_adc_state(state, DIBX000_ADC_OFF);
 }
 
-static int dib8000_read_status(struct dvb_frontend *fe, fe_status_t * stat);
+static int dib8000_read_status(struct dvb_frontend *fe, enum fe_status *stat);
 
 static int dib8000_get_frontend(struct dvb_frontend *fe)
 {
 	struct dib8000_state *state = fe->demodulator_priv;
 	u16 i, val = 0;
-	fe_status_t stat = 0;
+	enum fe_status stat = 0;
 	u8 index_frontend, sub_index_frontend;
 
 	fe->dtv_property_cache.bandwidth_hz = 6000000;
@@ -3733,9 +3733,9 @@ static int dib8000_set_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int dib8000_get_stats(struct dvb_frontend *fe, fe_status_t stat);
+static int dib8000_get_stats(struct dvb_frontend *fe, enum fe_status stat);
 
-static int dib8000_read_status(struct dvb_frontend *fe, fe_status_t * stat)
+static int dib8000_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib8000_state *state = fe->demodulator_priv;
 	u16 lock_slave = 0, lock;
@@ -4089,7 +4089,7 @@ static u32 dib8000_get_time_us(struct dvb_frontend *fe, int layer)
 	return time_us;
 }
 
-static int dib8000_get_stats(struct dvb_frontend *fe, fe_status_t stat)
+static int dib8000_get_stats(struct dvb_frontend *fe, enum fe_status stat)
 {
 	struct dib8000_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &state->fe[0]->dtv_property_cache;
diff --git a/drivers/media/dvb-frontends/dib9000.c b/drivers/media/dvb-frontends/dib9000.c
index f75dec443783..8f92aca0b073 100644
--- a/drivers/media/dvb-frontends/dib9000.c
+++ b/drivers/media/dvb-frontends/dib9000.c
@@ -1893,7 +1893,7 @@ static int dib9000_get_frontend(struct dvb_frontend *fe)
 {
 	struct dib9000_state *state = fe->demodulator_priv;
 	u8 index_frontend, sub_index_frontend;
-	fe_status_t stat;
+	enum fe_status stat;
 	int ret = 0;
 
 	if (state->get_frontend_internal == 0) {
@@ -2161,7 +2161,7 @@ static u16 dib9000_read_lock(struct dvb_frontend *fe)
 	return dib9000_read_word(state, 535);
 }
 
-static int dib9000_read_status(struct dvb_frontend *fe, fe_status_t * stat)
+static int dib9000_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib9000_state *state = fe->demodulator_priv;
 	u8 index_frontend;
diff --git a/drivers/media/dvb-frontends/drx39xyj/drxj.c b/drivers/media/dvb-frontends/drx39xyj/drxj.c
index 52245354bf04..b28b5787b39a 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drxj.c
+++ b/drivers/media/dvb-frontends/drx39xyj/drxj.c
@@ -11946,7 +11946,7 @@ static int drx39xxj_set_powerstate(struct dvb_frontend *fe, int enable)
 	return 0;
 }
 
-static int drx39xxj_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int drx39xxj_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct drx39xxj_state *state = fe->demodulator_priv;
 	struct drx_demod_instance *demod = state->demod;
diff --git a/drivers/media/dvb-frontends/drxd_hard.c b/drivers/media/dvb-frontends/drxd_hard.c
index 687e893d29fe..34b9441840da 100644
--- a/drivers/media/dvb-frontends/drxd_hard.c
+++ b/drivers/media/dvb-frontends/drxd_hard.c
@@ -2805,7 +2805,7 @@ static int drxd_read_signal_strength(struct dvb_frontend *fe, u16 * strength)
 	return 0;
 }
 
-static int drxd_read_status(struct dvb_frontend *fe, fe_status_t * status)
+static int drxd_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct drxd_state *state = fe->demodulator_priv;
 	u32 lock;
diff --git a/drivers/media/dvb-frontends/drxk_hard.c b/drivers/media/dvb-frontends/drxk_hard.c
index b1fc4bd44a2b..b975da099929 100644
--- a/drivers/media/dvb-frontends/drxk_hard.c
+++ b/drivers/media/dvb-frontends/drxk_hard.c
@@ -6640,7 +6640,7 @@ error:
 }
 
 
-static int drxk_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int drxk_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct drxk_state *state = fe->demodulator_priv;
 	int rc;
diff --git a/drivers/media/dvb-frontends/drxk_hard.h b/drivers/media/dvb-frontends/drxk_hard.h
index bae9c71dc3e9..9ed88e014942 100644
--- a/drivers/media/dvb-frontends/drxk_hard.h
+++ b/drivers/media/dvb-frontends/drxk_hard.h
@@ -350,7 +350,7 @@ struct drxk_state {
 	bool	antenna_dvbt;
 	u16	antenna_gpio;
 
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 
 	/* Firmware */
 	const char *microcode_name;
diff --git a/drivers/media/dvb-frontends/ds3000.c b/drivers/media/dvb-frontends/ds3000.c
index 9d0d0347758f..e8fc0329ea64 100644
--- a/drivers/media/dvb-frontends/ds3000.c
+++ b/drivers/media/dvb-frontends/ds3000.c
@@ -404,7 +404,8 @@ static int ds3000_load_firmware(struct dvb_frontend *fe,
 	return ret;
 }
 
-static int ds3000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int ds3000_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	struct ds3000_state *state = fe->demodulator_priv;
 	u8 data;
@@ -431,7 +432,7 @@ static int ds3000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
 	return 0;
 }
 
-static int ds3000_read_status(struct dvb_frontend *fe, fe_status_t* status)
+static int ds3000_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct ds3000_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
@@ -666,7 +667,7 @@ static int ds3000_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 	return 0;
 }
 
-static int ds3000_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int ds3000_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct ds3000_state *state = fe->demodulator_priv;
 	u8 data;
@@ -766,7 +767,7 @@ static int ds3000_send_diseqc_msg(struct dvb_frontend *fe,
 
 /* Send DiSEqC burst */
 static int ds3000_diseqc_send_burst(struct dvb_frontend *fe,
-					fe_sec_mini_cmd_t burst)
+				    enum fe_sec_mini_cmd burst)
 {
 	struct ds3000_state *state = fe->demodulator_priv;
 	int i;
@@ -905,7 +906,7 @@ static int ds3000_set_frontend(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 
 	int i;
-	fe_status_t status;
+	enum fe_status status;
 	s32 offset_khz;
 	u32 frequency;
 	u16 value;
@@ -1045,7 +1046,7 @@ static int ds3000_tune(struct dvb_frontend *fe,
 			bool re_tune,
 			unsigned int mode_flags,
 			unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	if (re_tune) {
 		int ret = ds3000_set_frontend(fe);
diff --git a/drivers/media/dvb-frontends/dvb_dummy_fe.c b/drivers/media/dvb-frontends/dvb_dummy_fe.c
index d5acc304786b..14e996d45fac 100644
--- a/drivers/media/dvb-frontends/dvb_dummy_fe.c
+++ b/drivers/media/dvb-frontends/dvb_dummy_fe.c
@@ -33,7 +33,8 @@ struct dvb_dummy_fe_state {
 };
 
 
-static int dvb_dummy_fe_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int dvb_dummy_fe_read_status(struct dvb_frontend *fe,
+				    enum fe_status *status)
 {
 	*status = FE_HAS_SIGNAL
 		| FE_HAS_CARRIER
@@ -97,12 +98,14 @@ static int dvb_dummy_fe_init(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int dvb_dummy_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int dvb_dummy_fe_set_tone(struct dvb_frontend *fe,
+				 enum fe_sec_tone_mode tone)
 {
 	return 0;
 }
 
-static int dvb_dummy_fe_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int dvb_dummy_fe_set_voltage(struct dvb_frontend *fe,
+				    enum fe_sec_voltage voltage)
 {
 	return 0;
 }
diff --git a/drivers/media/dvb-frontends/ec100.c b/drivers/media/dvb-frontends/ec100.c
index 9d424809d06b..c9012e677cd1 100644
--- a/drivers/media/dvb-frontends/ec100.c
+++ b/drivers/media/dvb-frontends/ec100.c
@@ -174,7 +174,7 @@ static int ec100_get_tune_settings(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int ec100_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int ec100_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct ec100_state *state = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/hd29l2.c b/drivers/media/dvb-frontends/hd29l2.c
index 67c8e6df42e8..40e359f2d17d 100644
--- a/drivers/media/dvb-frontends/hd29l2.c
+++ b/drivers/media/dvb-frontends/hd29l2.c
@@ -211,7 +211,7 @@ err:
 	return ret;
 }
 
-static int hd29l2_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int hd29l2_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	int ret;
 	struct hd29l2_priv *priv = fe->demodulator_priv;
diff --git a/drivers/media/dvb-frontends/hd29l2_priv.h b/drivers/media/dvb-frontends/hd29l2_priv.h
index 4d571a2282d4..6dc225c4bc91 100644
--- a/drivers/media/dvb-frontends/hd29l2_priv.h
+++ b/drivers/media/dvb-frontends/hd29l2_priv.h
@@ -67,7 +67,7 @@ struct hd29l2_priv {
 	struct hd29l2_config cfg;
 	u8 tuner_i2c_addr_programmed:1;
 
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 };
 
 static const struct reg_mod_vals reg_mod_vals_tab[] = {
diff --git a/drivers/media/dvb-frontends/isl6405.c b/drivers/media/dvb-frontends/isl6405.c
index 0c642a5bf823..b46450a10b80 100644
--- a/drivers/media/dvb-frontends/isl6405.c
+++ b/drivers/media/dvb-frontends/isl6405.c
@@ -43,7 +43,8 @@ struct isl6405 {
 	u8			i2c_addr;
 };
 
-static int isl6405_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int isl6405_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct isl6405 *isl6405 = (struct isl6405 *) fe->sec_priv;
 	struct i2c_msg msg = {	.addr = isl6405->i2c_addr, .flags = 0,
diff --git a/drivers/media/dvb-frontends/isl6421.c b/drivers/media/dvb-frontends/isl6421.c
index c77002fcc8e2..3a4d4606a426 100644
--- a/drivers/media/dvb-frontends/isl6421.c
+++ b/drivers/media/dvb-frontends/isl6421.c
@@ -43,7 +43,8 @@ struct isl6421 {
 	u8			i2c_addr;
 };
 
-static int isl6421_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int isl6421_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct isl6421 *isl6421 = (struct isl6421 *) fe->sec_priv;
 	struct i2c_msg msg = {	.addr = isl6421->i2c_addr, .flags = 0,
@@ -89,7 +90,8 @@ static int isl6421_enable_high_lnb_voltage(struct dvb_frontend *fe, long arg)
 	return (i2c_transfer(isl6421->i2c, &msg, 1) == 1) ? 0 : -EIO;
 }
 
-static int isl6421_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int isl6421_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode tone)
 {
 	struct isl6421 *isl6421 = (struct isl6421 *) fe->sec_priv;
 	struct i2c_msg msg = { .addr = isl6421->i2c_addr, .flags = 0,
diff --git a/drivers/media/dvb-frontends/l64781.c b/drivers/media/dvb-frontends/l64781.c
index ddf866c46f8b..0977871232a2 100644
--- a/drivers/media/dvb-frontends/l64781.c
+++ b/drivers/media/dvb-frontends/l64781.c
@@ -359,7 +359,7 @@ static int get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int l64781_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int l64781_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct l64781_state* state = fe->demodulator_priv;
 	int sync = l64781_readreg (state, 0x32);
diff --git a/drivers/media/dvb-frontends/lg2160.c b/drivers/media/dvb-frontends/lg2160.c
index 99efeba3c31a..7880f71ccd8a 100644
--- a/drivers/media/dvb-frontends/lg2160.c
+++ b/drivers/media/dvb-frontends/lg2160.c
@@ -1203,7 +1203,7 @@ static int lg216x_read_lock_status(struct lg216x_state *state,
 #endif
 }
 
-static int lg216x_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int lg216x_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct lg216x_state *state = fe->demodulator_priv;
 	int ret, acq_lock, sync_lock;
diff --git a/drivers/media/dvb-frontends/lgdt3305.c b/drivers/media/dvb-frontends/lgdt3305.c
index d08570af1c10..47121866163d 100644
--- a/drivers/media/dvb-frontends/lgdt3305.c
+++ b/drivers/media/dvb-frontends/lgdt3305.c
@@ -60,7 +60,7 @@ struct lgdt3305_state {
 
 	struct dvb_frontend frontend;
 
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	u32 current_frequency;
 	u32 snr;
 };
@@ -912,7 +912,7 @@ fail:
 	return ret;
 }
 
-static int lgdt3305_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int lgdt3305_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct lgdt3305_state *state = fe->demodulator_priv;
 	u8 val;
diff --git a/drivers/media/dvb-frontends/lgdt3306a.c b/drivers/media/dvb-frontends/lgdt3306a.c
index 0e2e43e9ede5..721fbc07e9ee 100644
--- a/drivers/media/dvb-frontends/lgdt3306a.c
+++ b/drivers/media/dvb-frontends/lgdt3306a.c
@@ -62,7 +62,7 @@ struct lgdt3306a_state {
 
 	struct dvb_frontend frontend;
 
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	u32 current_frequency;
 	u32 snr;
 };
@@ -1558,7 +1558,8 @@ lgdt3306a_qam_lock_poll(struct lgdt3306a_state *state)
 	return LG3306_UNLOCK;
 }
 
-static int lgdt3306a_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int lgdt3306a_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct lgdt3306a_state *state = fe->demodulator_priv;
 	u16 strength = 0;
@@ -1705,7 +1706,7 @@ static int lgdt3306a_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 
 static int lgdt3306a_tune(struct dvb_frontend *fe, bool re_tune,
 			  unsigned int mode_flags, unsigned int *delay,
-			  fe_status_t *status)
+			  enum fe_status *status)
 {
 	int ret = 0;
 	struct lgdt3306a_state *state = fe->demodulator_priv;
@@ -1735,7 +1736,7 @@ static int lgdt3306a_get_tune_settings(struct dvb_frontend *fe,
 
 static int lgdt3306a_search(struct dvb_frontend *fe)
 {
-	fe_status_t status = 0;
+	enum fe_status status = 0;
 	int i, ret;
 
 	/* set frontend */
diff --git a/drivers/media/dvb-frontends/lgdt330x.c b/drivers/media/dvb-frontends/lgdt330x.c
index 2e1a61893fc1..cf3cc20510da 100644
--- a/drivers/media/dvb-frontends/lgdt330x.c
+++ b/drivers/media/dvb-frontends/lgdt330x.c
@@ -67,7 +67,7 @@ struct lgdt330x_state
 	struct dvb_frontend frontend;
 
 	/* Demodulator private data */
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	u32 snr; /* Result of last SNR calculation */
 
 	/* Tuner private data */
@@ -447,7 +447,8 @@ static int lgdt330x_get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int lgdt3302_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int lgdt3302_read_status(struct dvb_frontend *fe,
+				enum fe_status *status)
 {
 	struct lgdt330x_state* state = fe->demodulator_priv;
 	u8 buf[3];
@@ -505,7 +506,8 @@ static int lgdt3302_read_status(struct dvb_frontend* fe, fe_status_t* status)
 	return 0;
 }
 
-static int lgdt3303_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int lgdt3303_read_status(struct dvb_frontend *fe,
+				enum fe_status *status)
 {
 	struct lgdt330x_state* state = fe->demodulator_priv;
 	int err;
diff --git a/drivers/media/dvb-frontends/lgs8gl5.c b/drivers/media/dvb-frontends/lgs8gl5.c
index 416cce3fefc7..7bbb2c18c2dd 100644
--- a/drivers/media/dvb-frontends/lgs8gl5.c
+++ b/drivers/media/dvb-frontends/lgs8gl5.c
@@ -249,7 +249,7 @@ lgs8gl5_init(struct dvb_frontend *fe)
 
 
 static int
-lgs8gl5_read_status(struct dvb_frontend *fe, fe_status_t *status)
+lgs8gl5_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct lgs8gl5_state *state = fe->demodulator_priv;
 	u8 level = lgs8gl5_read_reg(state, REG_STRENGTH);
diff --git a/drivers/media/dvb-frontends/lgs8gxx.c b/drivers/media/dvb-frontends/lgs8gxx.c
index 3c92f36ea5c7..e2c191c8b196 100644
--- a/drivers/media/dvb-frontends/lgs8gxx.c
+++ b/drivers/media/dvb-frontends/lgs8gxx.c
@@ -732,7 +732,8 @@ int lgs8gxx_get_tune_settings(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int lgs8gxx_read_status(struct dvb_frontend *fe, fe_status_t *fe_status)
+static int lgs8gxx_read_status(struct dvb_frontend *fe,
+			       enum fe_status *fe_status)
 {
 	struct lgs8gxx_state *priv = fe->demodulator_priv;
 	s8 ret;
diff --git a/drivers/media/dvb-frontends/lnbp21.c b/drivers/media/dvb-frontends/lnbp21.c
index f3ba7b5faa2e..4aca0fb9a8a7 100644
--- a/drivers/media/dvb-frontends/lnbp21.c
+++ b/drivers/media/dvb-frontends/lnbp21.c
@@ -45,7 +45,7 @@ struct lnbp21 {
 };
 
 static int lnbp21_set_voltage(struct dvb_frontend *fe,
-					fe_sec_voltage_t voltage)
+			      enum fe_sec_voltage voltage)
 {
 	struct lnbp21 *lnbp21 = (struct lnbp21 *) fe->sec_priv;
 	struct i2c_msg msg = {	.addr = lnbp21->i2c_addr, .flags = 0,
@@ -92,7 +92,7 @@ static int lnbp21_enable_high_lnb_voltage(struct dvb_frontend *fe, long arg)
 }
 
 static int lnbp21_set_tone(struct dvb_frontend *fe,
-				fe_sec_tone_mode_t tone)
+			   enum fe_sec_tone_mode tone)
 {
 	struct lnbp21 *lnbp21 = (struct lnbp21 *) fe->sec_priv;
 	struct i2c_msg msg = {	.addr = lnbp21->i2c_addr, .flags = 0,
diff --git a/drivers/media/dvb-frontends/lnbp22.c b/drivers/media/dvb-frontends/lnbp22.c
index c463da7f6dcc..d7ca0fdd0084 100644
--- a/drivers/media/dvb-frontends/lnbp22.c
+++ b/drivers/media/dvb-frontends/lnbp22.c
@@ -48,7 +48,8 @@ struct lnbp22 {
 	struct i2c_adapter *i2c;
 };
 
-static int lnbp22_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int lnbp22_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	struct lnbp22 *lnbp22 = (struct lnbp22 *)fe->sec_priv;
 	struct i2c_msg msg = {
diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c
index 7b21f1ad4542..c24b15238a8e 100644
--- a/drivers/media/dvb-frontends/m88ds3103.c
+++ b/drivers/media/dvb-frontends/m88ds3103.c
@@ -186,7 +186,8 @@ err:
 	return ret;
 }
 
-static int m88ds3103_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int m88ds3103_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct m88ds3103_priv *priv = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
@@ -1094,7 +1095,7 @@ static int m88ds3103_read_ber(struct dvb_frontend *fe, u32 *ber)
 }
 
 static int m88ds3103_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t fe_sec_tone_mode)
+	enum fe_sec_tone_mode fe_sec_tone_mode)
 {
 	struct m88ds3103_priv *priv = fe->demodulator_priv;
 	int ret;
@@ -1141,7 +1142,7 @@ err:
 }
 
 static int m88ds3103_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t fe_sec_voltage)
+	enum fe_sec_voltage fe_sec_voltage)
 {
 	struct m88ds3103_priv *priv = fe->demodulator_priv;
 	int ret;
@@ -1268,7 +1269,7 @@ err:
 }
 
 static int m88ds3103_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t fe_sec_mini_cmd)
+	enum fe_sec_mini_cmd fe_sec_mini_cmd)
 {
 	struct m88ds3103_priv *priv = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/m88ds3103_priv.h b/drivers/media/dvb-frontends/m88ds3103_priv.h
index 6217d928c23e..9d7d33430e40 100644
--- a/drivers/media/dvb-frontends/m88ds3103_priv.h
+++ b/drivers/media/dvb-frontends/m88ds3103_priv.h
@@ -38,8 +38,8 @@ struct m88ds3103_priv {
 	struct m88ds3103_config config;
 	const struct m88ds3103_config *cfg;
 	struct dvb_frontend fe;
-	fe_delivery_system_t delivery_system;
-	fe_status_t fe_status;
+	enum fe_delivery_system delivery_system;
+	enum fe_status fe_status;
 	u32 dvbv3_ber; /* for old DVBv3 API read_ber */
 	bool warm; /* FW running */
 	struct i2c_adapter *i2c_adapter;
diff --git a/drivers/media/dvb-frontends/m88rs2000.c b/drivers/media/dvb-frontends/m88rs2000.c
index d63bc9c13dce..9b6f464c48bd 100644
--- a/drivers/media/dvb-frontends/m88rs2000.c
+++ b/drivers/media/dvb-frontends/m88rs2000.c
@@ -41,7 +41,7 @@ struct m88rs2000_state {
 	u8 no_lock_count;
 	u32 tuner_frequency;
 	u32 symbol_rate;
-	fe_code_rate_t fec_inner;
+	enum fe_code_rate fec_inner;
 	u8 tuner_level;
 	int errmode;
 };
@@ -247,7 +247,7 @@ static int m88rs2000_send_diseqc_msg(struct dvb_frontend *fe,
 }
 
 static int m88rs2000_send_diseqc_burst(struct dvb_frontend *fe,
-						fe_sec_mini_cmd_t burst)
+				       enum fe_sec_mini_cmd burst)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	u8 reg0, reg1;
@@ -264,7 +264,8 @@ static int m88rs2000_send_diseqc_burst(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int m88rs2000_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int m88rs2000_set_tone(struct dvb_frontend *fe,
+			      enum fe_sec_tone_mode tone)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	u8 reg0, reg1;
@@ -412,7 +413,8 @@ static int m88rs2000_tab_set(struct m88rs2000_state *state,
 	return 0;
 }
 
-static int m88rs2000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t volt)
+static int m88rs2000_set_voltage(struct dvb_frontend *fe,
+				 enum fe_sec_voltage volt)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	u8 data;
@@ -462,7 +464,8 @@ static int m88rs2000_sleep(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int m88rs2000_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int m88rs2000_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	u8 reg = m88rs2000_readreg(state, 0x8c);
@@ -539,7 +542,7 @@ static int m88rs2000_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 }
 
 static int m88rs2000_set_fec(struct m88rs2000_state *state,
-		fe_code_rate_t fec)
+			     enum fe_code_rate fec)
 {
 	u8 fec_set, reg;
 	int ret;
@@ -574,7 +577,7 @@ static int m88rs2000_set_fec(struct m88rs2000_state *state,
 	return ret;
 }
 
-static fe_code_rate_t m88rs2000_get_fec(struct m88rs2000_state *state)
+static enum fe_code_rate m88rs2000_get_fec(struct m88rs2000_state *state)
 {
 	u8 reg;
 	m88rs2000_writereg(state, 0x9a, 0x30);
@@ -606,7 +609,7 @@ static int m88rs2000_set_frontend(struct dvb_frontend *fe)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	fe_status_t status;
+	enum fe_status status;
 	int i, ret = 0;
 	u32 tuner_freq;
 	s16 offset = 0;
diff --git a/drivers/media/dvb-frontends/mb86a16.c b/drivers/media/dvb-frontends/mb86a16.c
index 3ddea4471d2b..79bc671e8769 100644
--- a/drivers/media/dvb-frontends/mb86a16.c
+++ b/drivers/media/dvb-frontends/mb86a16.c
@@ -593,7 +593,7 @@ err:
 	return -EREMOTEIO;
 }
 
-static int mb86a16_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int mb86a16_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	u8 stat, stat2;
 	struct mb86a16_state *state = fe->demodulator_priv;
@@ -1562,7 +1562,8 @@ err:
 	return -EREMOTEIO;
 }
 
-static int mb86a16_send_diseqc_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t burst)
+static int mb86a16_send_diseqc_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct mb86a16_state *state = fe->demodulator_priv;
 
@@ -1590,7 +1591,7 @@ err:
 	return -EREMOTEIO;
 }
 
-static int mb86a16_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int mb86a16_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct mb86a16_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/mb86a16.h b/drivers/media/dvb-frontends/mb86a16.h
index e486dc0d8e60..dbd5f43fa128 100644
--- a/drivers/media/dvb-frontends/mb86a16.h
+++ b/drivers/media/dvb-frontends/mb86a16.h
@@ -28,7 +28,8 @@
 struct mb86a16_config {
 	u8 demod_address;
 
-	int (*set_voltage)(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+	int (*set_voltage)(struct dvb_frontend *fe,
+			   enum fe_sec_voltage voltage);
 };
 
 
diff --git a/drivers/media/dvb-frontends/mb86a20s.c b/drivers/media/dvb-frontends/mb86a20s.c
index 8f54c39ca63f..cfc005ee11d8 100644
--- a/drivers/media/dvb-frontends/mb86a20s.c
+++ b/drivers/media/dvb-frontends/mb86a20s.c
@@ -294,7 +294,7 @@ static int mb86a20s_i2c_readreg(struct mb86a20s_state *state,
  * The functions below assume that gateway lock has already obtained
  */
 
-static int mb86a20s_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int mb86a20s_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct mb86a20s_state *state = fe->demodulator_priv;
 	int val;
@@ -1951,7 +1951,7 @@ static int mb86a20s_set_frontend(struct dvb_frontend *fe)
 }
 
 static int mb86a20s_read_status_and_stats(struct dvb_frontend *fe,
-					  fe_status_t *status)
+					  enum fe_status *status)
 {
 	struct mb86a20s_state *state = fe->demodulator_priv;
 	int rc, status_nr;
@@ -2042,7 +2042,7 @@ static int mb86a20s_tune(struct dvb_frontend *fe,
 			bool re_tune,
 			unsigned int mode_flags,
 			unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	struct mb86a20s_state *state = fe->demodulator_priv;
 	int rc = 0;
diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c
index 2163490c1e6b..c36e6764eead 100644
--- a/drivers/media/dvb-frontends/mt312.c
+++ b/drivers/media/dvb-frontends/mt312.c
@@ -156,7 +156,7 @@ static int mt312_reset(struct mt312_state *state, const u8 full)
 }
 
 static int mt312_get_inversion(struct mt312_state *state,
-			       fe_spectral_inversion_t *i)
+			       enum fe_spectral_inversion *i)
 {
 	int ret;
 	u8 vit_mode;
@@ -225,9 +225,9 @@ static int mt312_get_symbol_rate(struct mt312_state *state, u32 *sr)
 	return 0;
 }
 
-static int mt312_get_code_rate(struct mt312_state *state, fe_code_rate_t *cr)
+static int mt312_get_code_rate(struct mt312_state *state, enum fe_code_rate *cr)
 {
-	const fe_code_rate_t fec_tab[8] =
+	const enum fe_code_rate fec_tab[8] =
 	    { FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_6_7, FEC_7_8,
 		FEC_AUTO, FEC_AUTO };
 
@@ -380,7 +380,8 @@ static int mt312_send_master_cmd(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int mt312_send_burst(struct dvb_frontend *fe, const fe_sec_mini_cmd_t c)
+static int mt312_send_burst(struct dvb_frontend *fe,
+			    const enum fe_sec_mini_cmd c)
 {
 	struct mt312_state *state = fe->demodulator_priv;
 	const u8 mini_tab[2] = { 0x02, 0x03 };
@@ -403,7 +404,8 @@ static int mt312_send_burst(struct dvb_frontend *fe, const fe_sec_mini_cmd_t c)
 	return 0;
 }
 
-static int mt312_set_tone(struct dvb_frontend *fe, const fe_sec_tone_mode_t t)
+static int mt312_set_tone(struct dvb_frontend *fe,
+			  const enum fe_sec_tone_mode t)
 {
 	struct mt312_state *state = fe->demodulator_priv;
 	const u8 tone_tab[2] = { 0x01, 0x00 };
@@ -426,7 +428,8 @@ static int mt312_set_tone(struct dvb_frontend *fe, const fe_sec_tone_mode_t t)
 	return 0;
 }
 
-static int mt312_set_voltage(struct dvb_frontend *fe, const fe_sec_voltage_t v)
+static int mt312_set_voltage(struct dvb_frontend *fe,
+			     const enum fe_sec_voltage v)
 {
 	struct mt312_state *state = fe->demodulator_priv;
 	const u8 volt_tab[3] = { 0x00, 0x40, 0x00 };
@@ -442,7 +445,7 @@ static int mt312_set_voltage(struct dvb_frontend *fe, const fe_sec_voltage_t v)
 	return mt312_writereg(state, DISEQC_MODE, val);
 }
 
-static int mt312_read_status(struct dvb_frontend *fe, fe_status_t *s)
+static int mt312_read_status(struct dvb_frontend *fe, enum fe_status *s)
 {
 	struct mt312_state *state = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/mt352.c b/drivers/media/dvb-frontends/mt352.c
index 2c3b50e828d7..123bb2f8e4b6 100644
--- a/drivers/media/dvb-frontends/mt352.c
+++ b/drivers/media/dvb-frontends/mt352.c
@@ -417,7 +417,7 @@ static int mt352_get_parameters(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int mt352_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int mt352_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct mt352_state* state = fe->demodulator_priv;
 	int s0, s1, s3;
diff --git a/drivers/media/dvb-frontends/nxt200x.c b/drivers/media/dvb-frontends/nxt200x.c
index 8a8e1ecb762d..79c3040912ab 100644
--- a/drivers/media/dvb-frontends/nxt200x.c
+++ b/drivers/media/dvb-frontends/nxt200x.c
@@ -781,7 +781,7 @@ static int nxt200x_setup_frontend_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int nxt200x_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int nxt200x_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct nxt200x_state* state = fe->demodulator_priv;
 	u8 lock;
diff --git a/drivers/media/dvb-frontends/nxt6000.c b/drivers/media/dvb-frontends/nxt6000.c
index 90ae6c72c0e3..73f9505367ac 100644
--- a/drivers/media/dvb-frontends/nxt6000.c
+++ b/drivers/media/dvb-frontends/nxt6000.c
@@ -109,7 +109,8 @@ static int nxt6000_set_bandwidth(struct nxt6000_state *state, u32 bandwidth)
 	return nxt6000_writereg(state, OFDM_TRL_NOMINALRATE_2, (nominal_rate >> 8) & 0xFF);
 }
 
-static int nxt6000_set_guard_interval(struct nxt6000_state* state, fe_guard_interval_t guard_interval)
+static int nxt6000_set_guard_interval(struct nxt6000_state *state,
+				      enum fe_guard_interval guard_interval)
 {
 	switch (guard_interval) {
 
@@ -131,7 +132,8 @@ static int nxt6000_set_guard_interval(struct nxt6000_state* state, fe_guard_inte
 	}
 }
 
-static int nxt6000_set_inversion(struct nxt6000_state* state, fe_spectral_inversion_t inversion)
+static int nxt6000_set_inversion(struct nxt6000_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 	switch (inversion) {
 
@@ -147,7 +149,9 @@ static int nxt6000_set_inversion(struct nxt6000_state* state, fe_spectral_invers
 	}
 }
 
-static int nxt6000_set_transmission_mode(struct nxt6000_state* state, fe_transmit_mode_t transmission_mode)
+static int
+nxt6000_set_transmission_mode(struct nxt6000_state *state,
+			      enum fe_transmit_mode transmission_mode)
 {
 	int result;
 
@@ -416,7 +420,7 @@ static void nxt6000_dump_status(struct nxt6000_state *state)
 	printk("\n");
 }
 
-static int nxt6000_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int nxt6000_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	u8 core_status;
 	struct nxt6000_state* state = fe->demodulator_priv;
diff --git a/drivers/media/dvb-frontends/or51132.c b/drivers/media/dvb-frontends/or51132.c
index cbbd259eacfe..35b1053b3640 100644
--- a/drivers/media/dvb-frontends/or51132.c
+++ b/drivers/media/dvb-frontends/or51132.c
@@ -63,7 +63,7 @@ struct or51132_state
 	struct dvb_frontend frontend;
 
 	/* Demodulator private data */
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	u32 snr; /* Result of last SNR calculation */
 
 	/* Tuner private data */
@@ -292,7 +292,7 @@ static int or51132_setmode(struct dvb_frontend* fe)
 #define MOD_FWCLASS_UNKNOWN	0
 #define MOD_FWCLASS_VSB		1
 #define MOD_FWCLASS_QAM		2
-static int modulation_fw_class(fe_modulation_t modulation)
+static int modulation_fw_class(enum fe_modulation modulation)
 {
 	switch(modulation) {
 	case VSB_8:
@@ -415,7 +415,7 @@ start:
 	return 0;
 }
 
-static int or51132_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int or51132_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct or51132_state* state = fe->demodulator_priv;
 	int reg;
diff --git a/drivers/media/dvb-frontends/or51211.c b/drivers/media/dvb-frontends/or51211.c
index 873ea1da844b..e82413b975e6 100644
--- a/drivers/media/dvb-frontends/or51211.c
+++ b/drivers/media/dvb-frontends/or51211.c
@@ -237,7 +237,7 @@ static int or51211_set_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int or51211_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int or51211_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct or51211_state* state = fe->demodulator_priv;
 	unsigned char rec_buf[2];
diff --git a/drivers/media/dvb-frontends/rtl2830.c b/drivers/media/dvb-frontends/rtl2830.c
index e1b8df62bd59..3d01f4f22aca 100644
--- a/drivers/media/dvb-frontends/rtl2830.c
+++ b/drivers/media/dvb-frontends/rtl2830.c
@@ -392,7 +392,7 @@ err:
 	return ret;
 }
 
-static int rtl2830_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int rtl2830_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct rtl2830_dev *dev = i2c_get_clientdata(client);
diff --git a/drivers/media/dvb-frontends/rtl2830_priv.h b/drivers/media/dvb-frontends/rtl2830_priv.h
index d50d5376c9c5..cf793f39a09b 100644
--- a/drivers/media/dvb-frontends/rtl2830_priv.h
+++ b/drivers/media/dvb-frontends/rtl2830_priv.h
@@ -34,7 +34,7 @@ struct rtl2830_dev {
 	bool sleeping;
 	unsigned long filters;
 	struct delayed_work stat_work;
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 	u64 post_bit_error_prev; /* for old DVBv3 read_ber() calculation */
 	u64 post_bit_error;
 	u64 post_bit_count;
diff --git a/drivers/media/dvb-frontends/rtl2832.c b/drivers/media/dvb-frontends/rtl2832.c
index a57c478e2306..822ea4b7a7ff 100644
--- a/drivers/media/dvb-frontends/rtl2832.c
+++ b/drivers/media/dvb-frontends/rtl2832.c
@@ -688,7 +688,7 @@ err:
 	return ret;
 }
 
-static int rtl2832_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int rtl2832_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct rtl2832_dev *dev = fe->demodulator_priv;
 	struct i2c_client *client = dev->client;
diff --git a/drivers/media/dvb-frontends/rtl2832_priv.h b/drivers/media/dvb-frontends/rtl2832_priv.h
index 6f3a49c63ab0..5dcd3a41d23f 100644
--- a/drivers/media/dvb-frontends/rtl2832_priv.h
+++ b/drivers/media/dvb-frontends/rtl2832_priv.h
@@ -39,7 +39,7 @@ struct rtl2832_dev {
 	struct i2c_adapter *i2c_adapter_tuner;
 	struct dvb_frontend fe;
 	struct delayed_work stat_work;
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 	u64 post_bit_error_prev; /* for old DVBv3 read_ber() calculation */
 	u64 post_bit_error;
 	u64 post_bit_count;
diff --git a/drivers/media/dvb-frontends/s5h1409.c b/drivers/media/dvb-frontends/s5h1409.c
index 5ff474a7ff29..10964848a2f1 100644
--- a/drivers/media/dvb-frontends/s5h1409.c
+++ b/drivers/media/dvb-frontends/s5h1409.c
@@ -38,7 +38,7 @@ struct s5h1409_state {
 	struct dvb_frontend frontend;
 
 	/* previous uncorrected block counter */
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 
 	u32 current_frequency;
 	int if_freq;
@@ -400,7 +400,7 @@ static int s5h1409_set_spectralinversion(struct dvb_frontend *fe, int inverted)
 }
 
 static int s5h1409_enable_modulation(struct dvb_frontend *fe,
-				     fe_modulation_t m)
+				     enum fe_modulation m)
 {
 	struct s5h1409_state *state = fe->demodulator_priv;
 
@@ -755,7 +755,7 @@ static int s5h1409_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int s5h1409_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int s5h1409_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct s5h1409_state *state = fe->demodulator_priv;
 	u16 reg;
diff --git a/drivers/media/dvb-frontends/s5h1411.c b/drivers/media/dvb-frontends/s5h1411.c
index 64f35fed7ae1..9afc3f42290e 100644
--- a/drivers/media/dvb-frontends/s5h1411.c
+++ b/drivers/media/dvb-frontends/s5h1411.c
@@ -37,7 +37,7 @@ struct s5h1411_state {
 
 	struct dvb_frontend frontend;
 
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	unsigned int first_tune:1;
 
 	u32 current_frequency;
@@ -484,7 +484,7 @@ static int s5h1411_set_serialmode(struct dvb_frontend *fe, int serial)
 }
 
 static int s5h1411_enable_modulation(struct dvb_frontend *fe,
-				     fe_modulation_t m)
+				     enum fe_modulation m)
 {
 	struct s5h1411_state *state = fe->demodulator_priv;
 
@@ -659,7 +659,7 @@ static int s5h1411_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int s5h1411_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int s5h1411_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct s5h1411_state *state = fe->demodulator_priv;
 	u16 reg;
diff --git a/drivers/media/dvb-frontends/s5h1420.c b/drivers/media/dvb-frontends/s5h1420.c
index dfc20665e372..9c22a4c70d87 100644
--- a/drivers/media/dvb-frontends/s5h1420.c
+++ b/drivers/media/dvb-frontends/s5h1420.c
@@ -52,7 +52,7 @@ struct s5h1420_state {
 	u8 postlocked:1;
 	u32 fclk;
 	u32 tunedfreq;
-	fe_code_rate_t fec_inner;
+	enum fe_code_rate fec_inner;
 	u32 symbol_rate;
 
 	/* FIXME: ugly workaround for flexcop's incapable i2c-controller
@@ -124,7 +124,8 @@ static int s5h1420_writereg (struct s5h1420_state* state, u8 reg, u8 data)
 	return 0;
 }
 
-static int s5h1420_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int s5h1420_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct s5h1420_state* state = fe->demodulator_priv;
 
@@ -149,7 +150,8 @@ static int s5h1420_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltag
 	return 0;
 }
 
-static int s5h1420_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int s5h1420_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode tone)
 {
 	struct s5h1420_state* state = fe->demodulator_priv;
 
@@ -270,7 +272,8 @@ exit:
 	return result;
 }
 
-static int s5h1420_send_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int s5h1420_send_burst(struct dvb_frontend *fe,
+			      enum fe_sec_mini_cmd minicmd)
 {
 	struct s5h1420_state* state = fe->demodulator_priv;
 	u8 val;
@@ -307,10 +310,10 @@ static int s5h1420_send_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t minicm
 	return result;
 }
 
-static fe_status_t s5h1420_get_status_bits(struct s5h1420_state* state)
+static enum fe_status s5h1420_get_status_bits(struct s5h1420_state *state)
 {
 	u8 val;
-	fe_status_t status = 0;
+	enum fe_status status = 0;
 
 	val = s5h1420_readreg(state, 0x14);
 	if (val & 0x02)
@@ -328,7 +331,8 @@ static fe_status_t s5h1420_get_status_bits(struct s5h1420_state* state)
 	return status;
 }
 
-static int s5h1420_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int s5h1420_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct s5h1420_state* state = fe->demodulator_priv;
 	u8 val;
@@ -601,7 +605,7 @@ static void s5h1420_setfec_inversion(struct s5h1420_state* state,
 	dprintk("leave %s\n", __func__);
 }
 
-static fe_code_rate_t s5h1420_getfec(struct s5h1420_state* state)
+static enum fe_code_rate s5h1420_getfec(struct s5h1420_state *state)
 {
 	switch(s5h1420_readreg(state, 0x32) & 0x07) {
 	case 0:
@@ -626,7 +630,8 @@ static fe_code_rate_t s5h1420_getfec(struct s5h1420_state* state)
 	return FEC_NONE;
 }
 
-static fe_spectral_inversion_t s5h1420_getinversion(struct s5h1420_state* state)
+static enum fe_spectral_inversion
+s5h1420_getinversion(struct s5h1420_state *state)
 {
 	if (s5h1420_readreg(state, 0x32) & 0x08)
 		return INVERSION_ON;
diff --git a/drivers/media/dvb-frontends/s5h1432.c b/drivers/media/dvb-frontends/s5h1432.c
index 6ec16a243741..4215652f8eb7 100644
--- a/drivers/media/dvb-frontends/s5h1432.c
+++ b/drivers/media/dvb-frontends/s5h1432.c
@@ -36,7 +36,7 @@ struct s5h1432_state {
 
 	struct dvb_frontend frontend;
 
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	unsigned int first_tune:1;
 
 	u32 current_frequency;
@@ -302,7 +302,7 @@ static int s5h1432_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int s5h1432_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int s5h1432_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	return 0;
 }
diff --git a/drivers/media/dvb-frontends/s921.c b/drivers/media/dvb-frontends/s921.c
index 69862e1fd9e9..b2d9fe13e1a0 100644
--- a/drivers/media/dvb-frontends/s921.c
+++ b/drivers/media/dvb-frontends/s921.c
@@ -348,7 +348,7 @@ static int s921_initfe(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int s921_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int s921_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct s921_state *state = fe->demodulator_priv;
 	int regstatus, rc;
@@ -389,7 +389,7 @@ static int s921_read_status(struct dvb_frontend *fe, fe_status_t *status)
 
 static int s921_read_signal_strength(struct dvb_frontend *fe, u16 *strength)
 {
-	fe_status_t	status;
+	enum fe_status	status;
 	struct s921_state *state = fe->demodulator_priv;
 	int rc;
 
@@ -449,7 +449,7 @@ static int s921_tune(struct dvb_frontend *fe,
 			bool re_tune,
 			unsigned int mode_flags,
 			unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	int rc = 0;
 
diff --git a/drivers/media/dvb-frontends/si2165.c b/drivers/media/dvb-frontends/si2165.c
index 4cc5d10ed0d4..7c2eeee69757 100644
--- a/drivers/media/dvb-frontends/si2165.c
+++ b/drivers/media/dvb-frontends/si2165.c
@@ -698,7 +698,7 @@ static int si2165_sleep(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int si2165_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int si2165_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	int ret;
 	u8 fec_lock = 0;
diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c
index d6a4cb0688c3..25e238c370e5 100644
--- a/drivers/media/dvb-frontends/si2168.c
+++ b/drivers/media/dvb-frontends/si2168.c
@@ -120,7 +120,7 @@ static int si2168_cmd_execute(struct i2c_client *client, struct si2168_cmd *cmd)
 	return ret;
 }
 
-static int si2168_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int si2168_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct si2168_dev *dev = i2c_get_clientdata(client);
diff --git a/drivers/media/dvb-frontends/si2168_priv.h b/drivers/media/dvb-frontends/si2168_priv.h
index 90b6b6eace24..c07e6fe2cb10 100644
--- a/drivers/media/dvb-frontends/si2168_priv.h
+++ b/drivers/media/dvb-frontends/si2168_priv.h
@@ -31,8 +31,8 @@
 struct si2168_dev {
 	struct i2c_adapter *adapter;
 	struct dvb_frontend fe;
-	fe_delivery_system_t delivery_system;
-	fe_status_t fe_status;
+	enum fe_delivery_system delivery_system;
+	enum fe_status fe_status;
 	bool active;
 	bool fw_loaded;
 	u8 ts_mode;
diff --git a/drivers/media/dvb-frontends/si21xx.c b/drivers/media/dvb-frontends/si21xx.c
index 16850e2bf02f..62ad7a7be9f8 100644
--- a/drivers/media/dvb-frontends/si21xx.c
+++ b/drivers/media/dvb-frontends/si21xx.c
@@ -410,7 +410,7 @@ static int si21xx_send_diseqc_msg(struct dvb_frontend *fe,
 }
 
 static int si21xx_send_diseqc_burst(struct dvb_frontend *fe,
-						fe_sec_mini_cmd_t burst)
+				    enum fe_sec_mini_cmd burst)
 {
 	struct si21xx_state *state = fe->demodulator_priv;
 	u8 val;
@@ -434,7 +434,7 @@ static int si21xx_send_diseqc_burst(struct dvb_frontend *fe,
 	return 0;
 }
 /*	30.06.2008 */
-static int si21xx_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int si21xx_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct si21xx_state *state = fe->demodulator_priv;
 	u8 val;
@@ -454,7 +454,7 @@ static int si21xx_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
 	}
 }
 
-static int si21xx_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t volt)
+static int si21xx_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage volt)
 {
 	struct si21xx_state *state = fe->demodulator_priv;
 
@@ -536,7 +536,7 @@ static int si21xx_init(struct dvb_frontend *fe)
 
 }
 
-static int si21_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int si21_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct si21xx_state *state = fe->demodulator_priv;
 	u8 regs_read[2];
@@ -641,7 +641,7 @@ static int si21_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 /*	initiates a channel acquisition sequence
 	using the specified symbol rate and code rate */
 static int si21xx_setacquire(struct dvb_frontend *fe, int symbrate,
-						fe_code_rate_t crate)
+			     enum fe_code_rate crate)
 {
 
 	struct si21xx_state *state = fe->demodulator_priv;
diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c
index 57dc2abaa87b..e87ac30d7fb8 100644
--- a/drivers/media/dvb-frontends/sp8870.c
+++ b/drivers/media/dvb-frontends/sp8870.c
@@ -350,7 +350,8 @@ static int sp8870_init (struct dvb_frontend* fe)
 	return 0;
 }
 
-static int sp8870_read_status (struct dvb_frontend* fe, fe_status_t * fe_status)
+static int sp8870_read_status(struct dvb_frontend *fe,
+			      enum fe_status *fe_status)
 {
 	struct sp8870_state* state = fe->demodulator_priv;
 	int status;
diff --git a/drivers/media/dvb-frontends/sp887x.c b/drivers/media/dvb-frontends/sp887x.c
index 1bb81b5ae6e0..4378fe1b978e 100644
--- a/drivers/media/dvb-frontends/sp887x.c
+++ b/drivers/media/dvb-frontends/sp887x.c
@@ -416,7 +416,7 @@ static int sp887x_setup_frontend_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int sp887x_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int sp887x_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct sp887x_state* state = fe->demodulator_priv;
 	u16 snr12 = sp887x_readreg(state, 0xf16);
diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c
index c73899d3a53d..756650f154ab 100644
--- a/drivers/media/dvb-frontends/stb0899_drv.c
+++ b/drivers/media/dvb-frontends/stb0899_drv.c
@@ -792,7 +792,8 @@ static int stb0899_wait_diseqc_txidle(struct stb0899_state *state, int timeout)
 	return 0;
 }
 
-static int stb0899_send_diseqc_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t burst)
+static int stb0899_send_diseqc_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct stb0899_state *state = fe->demodulator_priv;
 	u8 reg, old_state;
@@ -1178,7 +1179,8 @@ static int stb0899_read_ber(struct dvb_frontend *fe, u32 *ber)
 	return 0;
 }
 
-static int stb0899_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int stb0899_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct stb0899_state *state = fe->demodulator_priv;
 
@@ -1205,7 +1207,7 @@ static int stb0899_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage
 	return 0;
 }
 
-static int stb0899_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int stb0899_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct stb0899_state *state = fe->demodulator_priv;
 	struct stb0899_internal *internal = &state->internal;
diff --git a/drivers/media/dvb-frontends/stv0288.c b/drivers/media/dvb-frontends/stv0288.c
index ecf4bb3a3b6b..c93d9a45f7f7 100644
--- a/drivers/media/dvb-frontends/stv0288.c
+++ b/drivers/media/dvb-frontends/stv0288.c
@@ -44,7 +44,7 @@ struct stv0288_state {
 	u8 initialised:1;
 	u32 tuner_frequency;
 	u32 symbol_rate;
-	fe_code_rate_t fec_inner;
+	enum fe_code_rate fec_inner;
 	int errmode;
 };
 
@@ -174,7 +174,7 @@ static int stv0288_send_diseqc_msg(struct dvb_frontend *fe,
 }
 
 static int stv0288_send_diseqc_burst(struct dvb_frontend *fe,
-						fe_sec_mini_cmd_t burst)
+				     enum fe_sec_mini_cmd burst)
 {
 	struct stv0288_state *state = fe->demodulator_priv;
 
@@ -193,7 +193,7 @@ static int stv0288_send_diseqc_burst(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int stv0288_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int stv0288_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct stv0288_state *state = fe->demodulator_priv;
 
@@ -323,7 +323,8 @@ static u8 stv0288_inittab[] = {
 	0xff, 0xff,
 };
 
-static int stv0288_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t volt)
+static int stv0288_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage volt)
 {
 	dprintk("%s: %s\n", __func__,
 		volt == SEC_VOLTAGE_13 ? "SEC_VOLTAGE_13" :
@@ -361,7 +362,7 @@ static int stv0288_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int stv0288_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int stv0288_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct stv0288_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/stv0297.c b/drivers/media/dvb-frontends/stv0297.c
index dfc14d5c3999..75b4d8b25657 100644
--- a/drivers/media/dvb-frontends/stv0297.c
+++ b/drivers/media/dvb-frontends/stv0297.c
@@ -233,7 +233,8 @@ static void stv0297_set_initialdemodfreq(struct stv0297_state *state, long freq)
 	stv0297_writereg(state, 0x20, tmp);
 }
 
-static int stv0297_set_qam(struct stv0297_state *state, fe_modulation_t modulation)
+static int stv0297_set_qam(struct stv0297_state *state,
+			   enum fe_modulation modulation)
 {
 	int val = 0;
 
@@ -267,7 +268,8 @@ static int stv0297_set_qam(struct stv0297_state *state, fe_modulation_t modulati
 	return 0;
 }
 
-static int stv0297_set_inversion(struct stv0297_state *state, fe_spectral_inversion_t inversion)
+static int stv0297_set_inversion(struct stv0297_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 	int val = 0;
 
@@ -325,7 +327,8 @@ static int stv0297_sleep(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int stv0297_read_status(struct dvb_frontend *fe, fe_status_t * status)
+static int stv0297_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct stv0297_state *state = fe->demodulator_priv;
 
@@ -415,7 +418,7 @@ static int stv0297_set_frontend(struct dvb_frontend *fe)
 	int sweeprate;
 	int carrieroffset;
 	unsigned long timeout;
-	fe_spectral_inversion_t inversion;
+	enum fe_spectral_inversion inversion;
 
 	switch (p->modulation) {
 	case QAM_16:
diff --git a/drivers/media/dvb-frontends/stv0299.c b/drivers/media/dvb-frontends/stv0299.c
index b57ecf42e75a..0ca5d9f0d851 100644
--- a/drivers/media/dvb-frontends/stv0299.c
+++ b/drivers/media/dvb-frontends/stv0299.c
@@ -61,7 +61,7 @@ struct stv0299_state {
 	u8 initialised:1;
 	u32 tuner_frequency;
 	u32 symbol_rate;
-	fe_code_rate_t fec_inner;
+	enum fe_code_rate fec_inner;
 	int errmode;
 	u32 ucblocks;
 	u8 mcr_reg;
@@ -134,7 +134,7 @@ static int stv0299_readregs (struct stv0299_state* state, u8 reg1, u8 *b, u8 len
 	return ret == 2 ? 0 : ret;
 }
 
-static int stv0299_set_FEC (struct stv0299_state* state, fe_code_rate_t fec)
+static int stv0299_set_FEC(struct stv0299_state *state, enum fe_code_rate fec)
 {
 	dprintk ("%s\n", __func__);
 
@@ -170,10 +170,10 @@ static int stv0299_set_FEC (struct stv0299_state* state, fe_code_rate_t fec)
     }
 }
 
-static fe_code_rate_t stv0299_get_fec (struct stv0299_state* state)
+static enum fe_code_rate stv0299_get_fec(struct stv0299_state *state)
 {
-	static fe_code_rate_t fec_tab [] = { FEC_2_3, FEC_3_4, FEC_5_6,
-					     FEC_7_8, FEC_1_2 };
+	static enum fe_code_rate fec_tab[] = { FEC_2_3, FEC_3_4, FEC_5_6,
+					       FEC_7_8, FEC_1_2 };
 	u8 index;
 
 	dprintk ("%s\n", __func__);
@@ -302,7 +302,8 @@ static int stv0299_send_diseqc_msg (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int stv0299_send_diseqc_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t burst)
+static int stv0299_send_diseqc_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct stv0299_state* state = fe->demodulator_priv;
 	u8 val;
@@ -329,7 +330,8 @@ static int stv0299_send_diseqc_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t
 	return 0;
 }
 
-static int stv0299_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int stv0299_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode tone)
 {
 	struct stv0299_state* state = fe->demodulator_priv;
 	u8 val;
@@ -351,7 +353,8 @@ static int stv0299_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
 	}
 }
 
-static int stv0299_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int stv0299_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct stv0299_state* state = fe->demodulator_priv;
 	u8 reg0x08;
@@ -476,7 +479,8 @@ static int stv0299_init (struct dvb_frontend* fe)
 	return 0;
 }
 
-static int stv0299_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int stv0299_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct stv0299_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c
index b31ff265ff24..ec3e18e5ff50 100644
--- a/drivers/media/dvb-frontends/stv0367.c
+++ b/drivers/media/dvb-frontends/stv0367.c
@@ -59,7 +59,7 @@ struct stv0367cab_state {
 	int locked;			/* channel found		*/
 	u32 freq_khz;			/* found frequency (in kHz)	*/
 	u32 symbol_rate;		/* found symbol rate (in Bds)	*/
-	fe_spectral_inversion_t	spect_inv; /* Spectrum Inversion	*/
+	enum fe_spectral_inversion spect_inv; /* Spectrum Inversion	*/
 };
 
 struct stv0367ter_state {
@@ -67,10 +67,10 @@ struct stv0367ter_state {
 	enum stv0367_ter_signal_type state;
 	enum stv0367_ter_if_iq_mode if_iq_mode;
 	enum stv0367_ter_mode mode;/* mode 2K or 8K */
-	fe_guard_interval_t guard;
+	enum fe_guard_interval guard;
 	enum stv0367_ter_hierarchy hierarchy;
 	u32 frequency;
-	fe_spectral_inversion_t  sense; /*  current search spectrum */
+	enum fe_spectral_inversion sense; /*  current search spectrum */
 	u8  force; /* force mode/guard */
 	u8  bw; /* channel width 6, 7 or 8 in MHz */
 	u8  pBW; /* channel width used during previous lock */
@@ -2074,7 +2074,8 @@ static int stv0367ter_status(struct dvb_frontend *fe)
 	return locked;
 }
 #endif
-static int stv0367ter_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int stv0367ter_read_status(struct dvb_frontend *fe,
+				  enum fe_status *status)
 {
 	struct stv0367_state *state = fe->demodulator_priv;
 
@@ -2716,7 +2717,8 @@ static u32 stv0367cab_GetSymbolRate(struct stv0367_state *state, u32 mclk_hz)
 	return regsym;
 }
 
-static int stv0367cab_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int stv0367cab_read_status(struct dvb_frontend *fe,
+				  enum fe_status *status)
 {
 	struct stv0367_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/stv0367_priv.h b/drivers/media/dvb-frontends/stv0367_priv.h
index 995db0689ddd..89bf6f64b078 100644
--- a/drivers/media/dvb-frontends/stv0367_priv.h
+++ b/drivers/media/dvb-frontends/stv0367_priv.h
@@ -188,7 +188,7 @@ struct stv0367_cab_signal_info {
 	u32 frequency; /* kHz */
 	u32 symbol_rate; /* Mbds */
 	enum stv0367cab_mod modulation;
-	fe_spectral_inversion_t spect_inv;
+	enum fe_spectral_inversion spect_inv;
 	s32 Power_dBmx10;	/* Power of the RF signal (dBm x 10) */
 	u32	CN_dBx10;	/* Carrier to noise ratio (dB x 10) */
 	u32	BER;		/* Bit error rate (x 10000000)	*/
diff --git a/drivers/media/dvb-frontends/stv0900_core.c b/drivers/media/dvb-frontends/stv0900_core.c
index 2c88abfab531..fe31dd541955 100644
--- a/drivers/media/dvb-frontends/stv0900_core.c
+++ b/drivers/media/dvb-frontends/stv0900_core.c
@@ -1744,7 +1744,8 @@ static int stv0900_send_master_cmd(struct dvb_frontend *fe,
 				state->demod);
 }
 
-static int stv0900_send_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t burst)
+static int stv0900_send_burst(struct dvb_frontend *fe,
+			      enum fe_sec_mini_cmd burst)
 {
 	struct stv0900_state *state = fe->demodulator_priv;
 	struct stv0900_internal *intp = state->internal;
@@ -1793,7 +1794,8 @@ static int stv0900_recv_slave_reply(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int stv0900_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t toneoff)
+static int stv0900_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode toneoff)
 {
 	struct stv0900_state *state = fe->demodulator_priv;
 	struct stv0900_internal *intp = state->internal;
diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c
index 0b2a934f53e5..25bdf6e0f963 100644
--- a/drivers/media/dvb-frontends/stv090x.c
+++ b/drivers/media/dvb-frontends/stv090x.c
@@ -3732,7 +3732,7 @@ static int stv090x_read_cnr(struct dvb_frontend *fe, u16 *cnr)
 	return 0;
 }
 
-static int stv090x_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int stv090x_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct stv090x_state *state = fe->demodulator_priv;
 	u32 reg;
@@ -3822,7 +3822,8 @@ err:
 	return -1;
 }
 
-static int stv090x_send_diseqc_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t burst)
+static int stv090x_send_diseqc_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct stv090x_state *state = fe->demodulator_priv;
 	u32 reg, idle = 0, fifo_full = 1;
diff --git a/drivers/media/dvb-frontends/stv6110.c b/drivers/media/dvb-frontends/stv6110.c
index b1425830a24e..91c6dcf65d2a 100644
--- a/drivers/media/dvb-frontends/stv6110.c
+++ b/drivers/media/dvb-frontends/stv6110.c
@@ -158,7 +158,7 @@ static int stv6110_sleep(struct dvb_frontend *fe)
 	return 0;
 }
 
-static u32 carrier_width(u32 symbol_rate, fe_rolloff_t rolloff)
+static u32 carrier_width(u32 symbol_rate, enum fe_rolloff rolloff)
 {
 	u32 rlf;
 
diff --git a/drivers/media/dvb-frontends/tc90522.c b/drivers/media/dvb-frontends/tc90522.c
index dce22ce35d20..456cdc7fb1e7 100644
--- a/drivers/media/dvb-frontends/tc90522.c
+++ b/drivers/media/dvb-frontends/tc90522.c
@@ -130,7 +130,7 @@ static int tc90522t_set_layers(struct dvb_frontend *fe)
 
 /* frontend ops */
 
-static int tc90522s_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tc90522s_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct tc90522_state *state;
 	int ret;
@@ -158,7 +158,7 @@ static int tc90522s_read_status(struct dvb_frontend *fe, fe_status_t *status)
 	return 0;
 }
 
-static int tc90522t_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tc90522t_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct tc90522_state *state;
 	int ret;
@@ -194,7 +194,7 @@ static int tc90522t_read_status(struct dvb_frontend *fe, fe_status_t *status)
 	return 0;
 }
 
-static const fe_code_rate_t fec_conv_sat[] = {
+static const enum fe_code_rate fec_conv_sat[] = {
 	FEC_NONE, /* unused */
 	FEC_1_2, /* for BPSK */
 	FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_7_8, /* for QPSK */
@@ -238,7 +238,10 @@ static int tc90522s_get_frontend(struct dvb_frontend *fe)
 			c->layer[1].segment_count = 0;
 		else
 			c->layer[1].segment_count = val[4] & 0x3f; /* slots */
-		/* actually, BPSK if v==1, but not defined in fe_modulation_t */
+		/*
+		 * actually, BPSK if v==1, but not defined in
+		 * enum fe_modulation
+		 */
 		c->layer[1].modulation = QPSK;
 		layers = (v > 0) ? 2 : 1;
 	}
@@ -319,18 +322,18 @@ static int tc90522s_get_frontend(struct dvb_frontend *fe)
 }
 
 
-static const fe_transmit_mode_t tm_conv[] = {
+static const enum fe_transmit_mode tm_conv[] = {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_4K,
 	TRANSMISSION_MODE_8K,
 	0
 };
 
-static const fe_code_rate_t fec_conv_ter[] = {
+static const enum fe_code_rate fec_conv_ter[] = {
 	FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_7_8, 0, 0, 0
 };
 
-static const fe_modulation_t mod_conv[] = {
+static const enum fe_modulation mod_conv[] = {
 	DQPSK, QPSK, QAM_16, QAM_64, 0, 0, 0, 0
 };
 
diff --git a/drivers/media/dvb-frontends/tda10021.c b/drivers/media/dvb-frontends/tda10021.c
index 28d987068048..a684424e665a 100644
--- a/drivers/media/dvb-frontends/tda10021.c
+++ b/drivers/media/dvb-frontends/tda10021.c
@@ -129,8 +129,8 @@ static int unlock_tuner(struct tda10021_state* state)
 	return 0;
 }
 
-static int tda10021_setup_reg0 (struct tda10021_state* state, u8 reg0,
-				fe_spectral_inversion_t inversion)
+static int tda10021_setup_reg0(struct tda10021_state *state, u8 reg0,
+			       enum fe_spectral_inversion inversion)
 {
 	reg0 |= state->reg0 & 0x63;
 
@@ -308,7 +308,8 @@ static int tda10021_set_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int tda10021_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int tda10021_read_status(struct dvb_frontend *fe,
+				enum fe_status *status)
 {
 	struct tda10021_state* state = fe->demodulator_priv;
 	int sync;
diff --git a/drivers/media/dvb-frontends/tda10023.c b/drivers/media/dvb-frontends/tda10023.c
index f92fbbbb4a71..44a55656093f 100644
--- a/drivers/media/dvb-frontends/tda10023.c
+++ b/drivers/media/dvb-frontends/tda10023.c
@@ -376,7 +376,8 @@ static int tda10023_set_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int tda10023_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int tda10023_read_status(struct dvb_frontend *fe,
+				enum fe_status *status)
 {
 	struct tda10023_state* state = fe->demodulator_priv;
 	int sync;
diff --git a/drivers/media/dvb-frontends/tda10048.c b/drivers/media/dvb-frontends/tda10048.c
index 71fb63299de7..8451086c563f 100644
--- a/drivers/media/dvb-frontends/tda10048.c
+++ b/drivers/media/dvb-frontends/tda10048.c
@@ -792,7 +792,7 @@ static int tda10048_init(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int tda10048_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tda10048_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct tda10048_state *state = fe->demodulator_priv;
 	u8 reg;
diff --git a/drivers/media/dvb-frontends/tda1004x.c b/drivers/media/dvb-frontends/tda1004x.c
index d2b8ecbea81e..0e209b56c76c 100644
--- a/drivers/media/dvb-frontends/tda1004x.c
+++ b/drivers/media/dvb-frontends/tda1004x.c
@@ -1005,7 +1005,8 @@ static int tda1004x_get_fe(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int tda1004x_read_status(struct dvb_frontend* fe, fe_status_t * fe_status)
+static int tda1004x_read_status(struct dvb_frontend *fe,
+				enum fe_status *fe_status)
 {
 	struct tda1004x_state* state = fe->demodulator_priv;
 	int status;
diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c
index 1470a5d63f58..f6dc6307d35a 100644
--- a/drivers/media/dvb-frontends/tda10071.c
+++ b/drivers/media/dvb-frontends/tda10071.c
@@ -203,7 +203,7 @@ error:
 }
 
 static int tda10071_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t fe_sec_tone_mode)
+	enum fe_sec_tone_mode fe_sec_tone_mode)
 {
 	struct tda10071_priv *priv = fe->demodulator_priv;
 	struct tda10071_cmd cmd;
@@ -249,7 +249,7 @@ error:
 }
 
 static int tda10071_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t fe_sec_voltage)
+	enum fe_sec_voltage fe_sec_voltage)
 {
 	struct tda10071_priv *priv = fe->demodulator_priv;
 	struct tda10071_cmd cmd;
@@ -413,7 +413,7 @@ error:
 }
 
 static int tda10071_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t fe_sec_mini_cmd)
+	enum fe_sec_mini_cmd fe_sec_mini_cmd)
 {
 	struct tda10071_priv *priv = fe->demodulator_priv;
 	struct tda10071_cmd cmd;
@@ -476,7 +476,7 @@ error:
 	return ret;
 }
 
-static int tda10071_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tda10071_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct tda10071_priv *priv = fe->demodulator_priv;
 	int ret;
@@ -668,7 +668,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	int ret, i;
 	u8 mode, rolloff, pilot, inversion, div;
-	fe_modulation_t modulation;
+	enum fe_modulation modulation;
 
 	dev_dbg(&priv->i2c->dev,
 			"%s: delivery_system=%d modulation=%d frequency=%d symbol_rate=%d inversion=%d pilot=%d rolloff=%d\n",
diff --git a/drivers/media/dvb-frontends/tda10071_priv.h b/drivers/media/dvb-frontends/tda10071_priv.h
index 7ec69ac510d7..54d7c713eec8 100644
--- a/drivers/media/dvb-frontends/tda10071_priv.h
+++ b/drivers/media/dvb-frontends/tda10071_priv.h
@@ -34,15 +34,15 @@ struct tda10071_priv {
 	u8 meas_count[2];
 	u32 ber;
 	u32 ucb;
-	fe_status_t fe_status;
-	fe_delivery_system_t delivery_system;
+	enum fe_status fe_status;
+	enum fe_delivery_system delivery_system;
 	bool warm; /* FW running */
 };
 
 static struct tda10071_modcod {
-	fe_delivery_system_t delivery_system;
-	fe_modulation_t modulation;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delivery_system;
+	enum fe_modulation modulation;
+	enum fe_code_rate fec;
 	u8 val;
 } TDA10071_MODCOD[] = {
 	/* NBC-QPSK */
diff --git a/drivers/media/dvb-frontends/tda10086.c b/drivers/media/dvb-frontends/tda10086.c
index f1a752187d08..95a33e187f8e 100644
--- a/drivers/media/dvb-frontends/tda10086.c
+++ b/drivers/media/dvb-frontends/tda10086.c
@@ -185,7 +185,8 @@ static void tda10086_diseqc_wait(struct tda10086_state *state)
 	}
 }
 
-static int tda10086_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int tda10086_set_tone(struct dvb_frontend *fe,
+			     enum fe_sec_tone_mode tone)
 {
 	struct tda10086_state* state = fe->demodulator_priv;
 	u8 t22k_off = 0x80;
@@ -238,7 +239,8 @@ static int tda10086_send_master_cmd (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int tda10086_send_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int tda10086_send_burst(struct dvb_frontend *fe,
+			       enum fe_sec_mini_cmd minicmd)
 {
 	struct tda10086_state* state = fe->demodulator_priv;
 	u8 oldval = tda10086_read_byte(state, 0x36);
@@ -551,7 +553,8 @@ static int tda10086_get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int tda10086_read_status(struct dvb_frontend* fe, fe_status_t *fe_status)
+static int tda10086_read_status(struct dvb_frontend *fe,
+				enum fe_status *fe_status)
 {
 	struct tda10086_state* state = fe->demodulator_priv;
 	u8 val;
diff --git a/drivers/media/dvb-frontends/tda8083.c b/drivers/media/dvb-frontends/tda8083.c
index 69e62f42e2e1..796543fa2c8d 100644
--- a/drivers/media/dvb-frontends/tda8083.c
+++ b/drivers/media/dvb-frontends/tda8083.c
@@ -97,7 +97,8 @@ static inline u8 tda8083_readreg (struct tda8083_state* state, u8 reg)
 	return val;
 }
 
-static int tda8083_set_inversion (struct tda8083_state* state, fe_spectral_inversion_t inversion)
+static int tda8083_set_inversion(struct tda8083_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 	/*  XXX FIXME: implement other modes than FEC_AUTO */
 	if (inversion == INVERSION_AUTO)
@@ -106,7 +107,7 @@ static int tda8083_set_inversion (struct tda8083_state* state, fe_spectral_inver
 	return -EINVAL;
 }
 
-static int tda8083_set_fec (struct tda8083_state* state, fe_code_rate_t fec)
+static int tda8083_set_fec(struct tda8083_state *state, enum fe_code_rate fec)
 {
 	if (fec == FEC_AUTO)
 		return tda8083_writereg (state, 0x07, 0xff);
@@ -117,11 +118,13 @@ static int tda8083_set_fec (struct tda8083_state* state, fe_code_rate_t fec)
 	return -EINVAL;
 }
 
-static fe_code_rate_t tda8083_get_fec (struct tda8083_state* state)
+static enum fe_code_rate tda8083_get_fec(struct tda8083_state *state)
 {
 	u8 index;
-	static fe_code_rate_t fec_tab [] = { FEC_8_9, FEC_1_2, FEC_2_3, FEC_3_4,
-				       FEC_4_5, FEC_5_6, FEC_6_7, FEC_7_8 };
+	static enum fe_code_rate fec_tab[] = {
+		FEC_8_9, FEC_1_2, FEC_2_3, FEC_3_4,
+		FEC_4_5, FEC_5_6, FEC_6_7, FEC_7_8
+	};
 
 	index = tda8083_readreg(state, 0x0e) & 0x07;
 
@@ -178,7 +181,8 @@ static void tda8083_wait_diseqc_fifo (struct tda8083_state* state, int timeout)
 	}
 }
 
-static int tda8083_set_tone (struct tda8083_state* state, fe_sec_tone_mode_t tone)
+static int tda8083_set_tone(struct tda8083_state *state,
+			    enum fe_sec_tone_mode tone)
 {
 	tda8083_writereg (state, 0x26, 0xf1);
 
@@ -192,7 +196,8 @@ static int tda8083_set_tone (struct tda8083_state* state, fe_sec_tone_mode_t ton
 	}
 }
 
-static int tda8083_set_voltage (struct tda8083_state* state, fe_sec_voltage_t voltage)
+static int tda8083_set_voltage(struct tda8083_state *state,
+			       enum fe_sec_voltage voltage)
 {
 	switch (voltage) {
 	case SEC_VOLTAGE_13:
@@ -204,7 +209,8 @@ static int tda8083_set_voltage (struct tda8083_state* state, fe_sec_voltage_t vo
 	}
 }
 
-static int tda8083_send_diseqc_burst (struct tda8083_state* state, fe_sec_mini_cmd_t burst)
+static int tda8083_send_diseqc_burst(struct tda8083_state *state,
+				     enum fe_sec_mini_cmd burst)
 {
 	switch (burst) {
 	case SEC_MINI_A:
@@ -222,8 +228,8 @@ static int tda8083_send_diseqc_burst (struct tda8083_state* state, fe_sec_mini_c
 	return 0;
 }
 
-static int tda8083_send_diseqc_msg (struct dvb_frontend* fe,
-				    struct dvb_diseqc_master_cmd *m)
+static int tda8083_send_diseqc_msg(struct dvb_frontend *fe,
+				   struct dvb_diseqc_master_cmd *m)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 	int i;
@@ -240,7 +246,8 @@ static int tda8083_send_diseqc_msg (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int tda8083_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int tda8083_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 
@@ -372,7 +379,8 @@ static int tda8083_init(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int tda8083_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t burst)
+static int tda8083_diseqc_send_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 
@@ -383,7 +391,8 @@ static int tda8083_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t
 	return 0;
 }
 
-static int tda8083_diseqc_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int tda8083_diseqc_set_tone(struct dvb_frontend *fe,
+				   enum fe_sec_tone_mode tone)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 
@@ -394,7 +403,8 @@ static int tda8083_diseqc_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t t
 	return 0;
 }
 
-static int tda8083_diseqc_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int tda8083_diseqc_set_voltage(struct dvb_frontend *fe,
+				      enum fe_sec_voltage voltage)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/ves1820.c b/drivers/media/dvb-frontends/ves1820.c
index bb42b563c42d..aacfdda3e005 100644
--- a/drivers/media/dvb-frontends/ves1820.c
+++ b/drivers/media/dvb-frontends/ves1820.c
@@ -90,7 +90,8 @@ static u8 ves1820_readreg(struct ves1820_state *state, u8 reg)
 	return b1[0];
 }
 
-static int ves1820_setup_reg0(struct ves1820_state *state, u8 reg0, fe_spectral_inversion_t inversion)
+static int ves1820_setup_reg0(struct ves1820_state *state,
+			      u8 reg0, enum fe_spectral_inversion inversion)
 {
 	reg0 |= state->reg0 & 0x62;
 
@@ -237,7 +238,8 @@ static int ves1820_set_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int ves1820_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int ves1820_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct ves1820_state* state = fe->demodulator_priv;
 	int sync;
diff --git a/drivers/media/dvb-frontends/ves1x93.c b/drivers/media/dvb-frontends/ves1x93.c
index 9c17eacaec24..526952396422 100644
--- a/drivers/media/dvb-frontends/ves1x93.c
+++ b/drivers/media/dvb-frontends/ves1x93.c
@@ -41,7 +41,7 @@ struct ves1x93_state {
 	struct dvb_frontend frontend;
 
 	/* previous uncorrected block counter */
-	fe_spectral_inversion_t inversion;
+	enum fe_spectral_inversion inversion;
 	u8 *init_1x93_tab;
 	u8 *init_1x93_wtab;
 	u8 tab_size;
@@ -130,7 +130,8 @@ static int ves1x93_clr_bit (struct ves1x93_state* state)
 	return 0;
 }
 
-static int ves1x93_set_inversion (struct ves1x93_state* state, fe_spectral_inversion_t inversion)
+static int ves1x93_set_inversion(struct ves1x93_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 	u8 val;
 
@@ -156,7 +157,7 @@ static int ves1x93_set_inversion (struct ves1x93_state* state, fe_spectral_inver
 	return ves1x93_writereg (state, 0x0c, (state->init_1x93_tab[0x0c] & 0x3f) | val);
 }
 
-static int ves1x93_set_fec (struct ves1x93_state* state, fe_code_rate_t fec)
+static int ves1x93_set_fec(struct ves1x93_state *state, enum fe_code_rate fec)
 {
 	if (fec == FEC_AUTO)
 		return ves1x93_writereg (state, 0x0d, 0x08);
@@ -166,7 +167,7 @@ static int ves1x93_set_fec (struct ves1x93_state* state, fe_code_rate_t fec)
 		return ves1x93_writereg (state, 0x0d, fec - FEC_1_2);
 }
 
-static fe_code_rate_t ves1x93_get_fec (struct ves1x93_state* state)
+static enum fe_code_rate ves1x93_get_fec(struct ves1x93_state *state)
 {
 	return FEC_1_2 + ((ves1x93_readreg (state, 0x0d) >> 4) & 0x7);
 }
@@ -281,7 +282,8 @@ static int ves1x93_init (struct dvb_frontend* fe)
 	return 0;
 }
 
-static int ves1x93_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int ves1x93_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct ves1x93_state* state = fe->demodulator_priv;
 
@@ -297,7 +299,8 @@ static int ves1x93_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltag
 	}
 }
 
-static int ves1x93_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int ves1x93_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct ves1x93_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/zl10353.c b/drivers/media/dvb-frontends/zl10353.c
index 4e62a6611847..ef9764a02d4c 100644
--- a/drivers/media/dvb-frontends/zl10353.c
+++ b/drivers/media/dvb-frontends/zl10353.c
@@ -462,7 +462,7 @@ static int zl10353_get_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int zl10353_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int zl10353_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct zl10353_state *state = fe->demodulator_priv;
 	int s6, s7, s8;
diff --git a/drivers/media/firewire/firedtv-fe.c b/drivers/media/firewire/firedtv-fe.c
index 6fe9793b98b3..17acda6bcb6e 100644
--- a/drivers/media/firewire/firedtv-fe.c
+++ b/drivers/media/firewire/firedtv-fe.c
@@ -61,12 +61,12 @@ static int fdtv_diseqc_send_master_cmd(struct dvb_frontend *fe,
 }
 
 static int fdtv_diseqc_send_burst(struct dvb_frontend *fe,
-				  fe_sec_mini_cmd_t minicmd)
+				  enum fe_sec_mini_cmd minicmd)
 {
 	return 0;
 }
 
-static int fdtv_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int fdtv_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct firedtv *fdtv = fe->sec_priv;
 
@@ -75,7 +75,7 @@ static int fdtv_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
 }
 
 static int fdtv_set_voltage(struct dvb_frontend *fe,
-			    fe_sec_voltage_t voltage)
+			    enum fe_sec_voltage voltage)
 {
 	struct firedtv *fdtv = fe->sec_priv;
 
@@ -83,7 +83,7 @@ static int fdtv_set_voltage(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int fdtv_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int fdtv_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct firedtv *fdtv = fe->sec_priv;
 	struct firedtv_tuner_status stat;
diff --git a/drivers/media/firewire/firedtv.h b/drivers/media/firewire/firedtv.h
index 346a85be6de2..345d1eda8c05 100644
--- a/drivers/media/firewire/firedtv.h
+++ b/drivers/media/firewire/firedtv.h
@@ -99,8 +99,8 @@ struct firedtv {
 	s8			isochannel;
 	struct fdtv_ir_context	*ir_context;
 
-	fe_sec_voltage_t	voltage;
-	fe_sec_tone_mode_t	tone;
+	enum fe_sec_voltage	voltage;
+	enum fe_sec_tone_mode	tone;
 
 	struct mutex		demux_mutex;
 	unsigned long		channel_active;
diff --git a/drivers/media/pci/bt8xx/dst.c b/drivers/media/pci/bt8xx/dst.c
index f2261dfe5d1a..4a90eee5e3bb 100644
--- a/drivers/media/pci/bt8xx/dst.c
+++ b/drivers/media/pci/bt8xx/dst.c
@@ -425,7 +425,8 @@ static int dst_set_bandwidth(struct dst_state *state, u32 bandwidth)
 	return 0;
 }
 
-static int dst_set_inversion(struct dst_state *state, fe_spectral_inversion_t inversion)
+static int dst_set_inversion(struct dst_state *state,
+			     enum fe_spectral_inversion inversion)
 {
 	state->inversion = inversion;
 	switch (inversion) {
@@ -442,13 +443,13 @@ static int dst_set_inversion(struct dst_state *state, fe_spectral_inversion_t in
 	return 0;
 }
 
-static int dst_set_fec(struct dst_state *state, fe_code_rate_t fec)
+static int dst_set_fec(struct dst_state *state, enum fe_code_rate fec)
 {
 	state->fec = fec;
 	return 0;
 }
 
-static fe_code_rate_t dst_get_fec(struct dst_state *state)
+static enum fe_code_rate dst_get_fec(struct dst_state *state)
 {
 	return state->fec;
 }
@@ -499,7 +500,8 @@ static int dst_set_symbolrate(struct dst_state *state, u32 srate)
 	return 0;
 }
 
-static int dst_set_modulation(struct dst_state *state, fe_modulation_t modulation)
+static int dst_set_modulation(struct dst_state *state,
+			      enum fe_modulation modulation)
 {
 	if (state->dst_type != DST_TYPE_IS_CABLE)
 		return -EOPNOTSUPP;
@@ -536,7 +538,7 @@ static int dst_set_modulation(struct dst_state *state, fe_modulation_t modulatio
 	return 0;
 }
 
-static fe_modulation_t dst_get_modulation(struct dst_state *state)
+static enum fe_modulation dst_get_modulation(struct dst_state *state)
 {
 	return state->modulation;
 }
@@ -1376,7 +1378,8 @@ static int dst_get_tuna(struct dst_state *state)
 	return 1;
 }
 
-static int dst_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+static int dst_set_voltage(struct dvb_frontend *fe,
+			   enum fe_sec_voltage voltage);
 
 static int dst_write_tuna(struct dvb_frontend *fe)
 {
@@ -1466,7 +1469,7 @@ static int dst_set_diseqc(struct dvb_frontend *fe, struct dvb_diseqc_master_cmd
 	return dst_command(state, paket, 8);
 }
 
-static int dst_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int dst_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage)
 {
 	int need_cmd, retval = 0;
 	struct dst_state *state = fe->demodulator_priv;
@@ -1500,7 +1503,7 @@ static int dst_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
 	return retval;
 }
 
-static int dst_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int dst_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct dst_state *state = fe->demodulator_priv;
 
@@ -1525,7 +1528,7 @@ static int dst_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
 	return dst_tone_power_cmd(state);
 }
 
-static int dst_send_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t minicmd)
+static int dst_send_burst(struct dvb_frontend *fe, enum fe_sec_mini_cmd minicmd)
 {
 	struct dst_state *state = fe->demodulator_priv;
 
@@ -1575,7 +1578,7 @@ static int bt8xx_dst_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int dst_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int dst_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct dst_state *state = fe->demodulator_priv;
 
@@ -1646,7 +1649,7 @@ static int dst_tune_frontend(struct dvb_frontend* fe,
 			    bool re_tune,
 			    unsigned int mode_flags,
 			    unsigned int *delay,
-			    fe_status_t *status)
+			    enum fe_status *status)
 {
 	struct dst_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
diff --git a/drivers/media/pci/bt8xx/dst_common.h b/drivers/media/pci/bt8xx/dst_common.h
index d70d98f1a571..6a2cfdd44e3e 100644
--- a/drivers/media/pci/bt8xx/dst_common.h
+++ b/drivers/media/pci/bt8xx/dst_common.h
@@ -113,11 +113,11 @@ struct dst_state {
 	u8 dst_type;
 	u32 type_flags;
 	u32 frequency;		/* intermediate frequency in kHz for QPSK */
-	fe_spectral_inversion_t inversion;
+	enum fe_spectral_inversion inversion;
 	u32 symbol_rate;	/* symbol rate in Symbols per second */
-	fe_code_rate_t fec;
-	fe_sec_voltage_t voltage;
-	fe_sec_tone_mode_t tone;
+	enum fe_code_rate fec;
+	enum fe_sec_voltage voltage;
+	enum fe_sec_tone_mode tone;
 	u32 decode_freq;
 	u8 decode_lock;
 	u16 decode_strength;
@@ -127,8 +127,8 @@ struct dst_state {
 	u32 bandwidth;
 	u32 dst_hw_cap;
 	u8 dst_fw_version;
-	fe_sec_mini_cmd_t minicmd;
-	fe_modulation_t modulation;
+	enum fe_sec_mini_cmd minicmd;
+	enum fe_modulation modulation;
 	u8 messages[256];
 	u8 mac_address[8];
 	u8 fw_version[8];
diff --git a/drivers/media/pci/cx23885/cx23885-dvb.c b/drivers/media/pci/cx23885/cx23885-dvb.c
index 9f377ad5e845..a77c2d3b50fb 100644
--- a/drivers/media/pci/cx23885/cx23885-dvb.c
+++ b/drivers/media/pci/cx23885/cx23885-dvb.c
@@ -572,7 +572,8 @@ static struct stb6100_config prof_8000_stb6100_config = {
 	.refclock = 27000000,
 };
 
-static int p8000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int p8000_set_voltage(struct dvb_frontend *fe,
+			     enum fe_sec_voltage voltage)
 {
 	struct cx23885_tsport *port = fe->dvb->priv;
 	struct cx23885_dev *dev = port->dev;
@@ -587,7 +588,7 @@ static int p8000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
 }
 
 static int dvbsky_t9580_set_voltage(struct dvb_frontend *fe,
-					fe_sec_voltage_t voltage)
+					enum fe_sec_voltage voltage)
 {
 	struct cx23885_tsport *port = fe->dvb->priv;
 	struct cx23885_dev *dev = port->dev;
@@ -616,7 +617,7 @@ static int dvbsky_t9580_set_voltage(struct dvb_frontend *fe,
 }
 
 static int dvbsky_s952_portc_set_voltage(struct dvb_frontend *fe,
-					fe_sec_voltage_t voltage)
+					enum fe_sec_voltage voltage)
 {
 	struct cx23885_tsport *port = fe->dvb->priv;
 	struct cx23885_dev *dev = port->dev;
@@ -1186,7 +1187,8 @@ static int dvb_register(struct cx23885_tsport *port)
 	struct i2c_client *client_demod = NULL, *client_tuner = NULL;
 	struct i2c_client *client_sec = NULL;
 	const struct m88ds3103_config *p_m88ds3103_config = NULL;
-	int (*p_set_voltage)(struct dvb_frontend *fe, fe_sec_voltage_t voltage) = NULL;
+	int (*p_set_voltage)(struct dvb_frontend *fe,
+			     enum fe_sec_voltage voltage) = NULL;
 	int mfe_shared = 0; /* bus not shared by default */
 	int ret;
 
diff --git a/drivers/media/pci/cx23885/cx23885-f300.c b/drivers/media/pci/cx23885/cx23885-f300.c
index 6f817d8732da..a6c45eb0a105 100644
--- a/drivers/media/pci/cx23885/cx23885-f300.c
+++ b/drivers/media/pci/cx23885/cx23885-f300.c
@@ -144,7 +144,7 @@ static u8 f300_xfer(struct dvb_frontend *fe, u8 *buf)
 	return ret;
 }
 
-int f300_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+int f300_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage)
 {
 	u8 buf[16];
 
diff --git a/drivers/media/pci/cx23885/cx23885-f300.h b/drivers/media/pci/cx23885/cx23885-f300.h
index e73344c94963..be14d7de7cd8 100644
--- a/drivers/media/pci/cx23885/cx23885-f300.h
+++ b/drivers/media/pci/cx23885/cx23885-f300.h
@@ -1,2 +1,2 @@
 extern int f300_set_voltage(struct dvb_frontend *fe,
-				fe_sec_voltage_t voltage);
+			    enum fe_sec_voltage voltage);
diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
index 81e25194986b..027ead438194 100644
--- a/drivers/media/pci/cx23885/cx23885.h
+++ b/drivers/media/pci/cx23885/cx23885.h
@@ -309,7 +309,7 @@ struct cx23885_tsport {
 
 	int (*set_frontend)(struct dvb_frontend *fe);
 	int (*fe_set_voltage)(struct dvb_frontend *fe,
-				fe_sec_voltage_t voltage);
+			      enum fe_sec_voltage voltage);
 };
 
 struct cx23885_kernel_ir {
diff --git a/drivers/media/pci/cx88/cx88-dvb.c b/drivers/media/pci/cx88/cx88-dvb.c
index 1b2ed238cdb6..9dfa5ee32a8f 100644
--- a/drivers/media/pci/cx88/cx88-dvb.c
+++ b/drivers/media/pci/cx88/cx88-dvb.c
@@ -449,7 +449,7 @@ static int cx24123_set_ts_param(struct dvb_frontend* fe,
 }
 
 static int kworld_dvbs_100_set_voltage(struct dvb_frontend* fe,
-				       fe_sec_voltage_t voltage)
+				       enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev= fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -465,7 +465,7 @@ static int kworld_dvbs_100_set_voltage(struct dvb_frontend* fe,
 }
 
 static int geniatech_dvbs_set_voltage(struct dvb_frontend *fe,
-				      fe_sec_voltage_t voltage)
+				      enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev= fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -481,7 +481,7 @@ static int geniatech_dvbs_set_voltage(struct dvb_frontend *fe,
 }
 
 static int tevii_dvbs_set_voltage(struct dvb_frontend *fe,
-				      fe_sec_voltage_t voltage)
+				  enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev= fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -505,7 +505,7 @@ static int tevii_dvbs_set_voltage(struct dvb_frontend *fe,
 }
 
 static int vp1027_set_voltage(struct dvb_frontend *fe,
-				    fe_sec_voltage_t voltage)
+			      enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev = fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -897,7 +897,7 @@ static int samsung_smt_7020_tuner_set_params(struct dvb_frontend *fe)
 }
 
 static int samsung_smt_7020_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t tone)
+	enum fe_sec_tone_mode tone)
 {
 	struct cx8802_dev *dev = fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -919,7 +919,7 @@ static int samsung_smt_7020_set_tone(struct dvb_frontend *fe,
 }
 
 static int samsung_smt_7020_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+					enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev = fe->dvb->priv;
 	struct cx88_core *core = dev->core;
diff --git a/drivers/media/pci/cx88/cx88.h b/drivers/media/pci/cx88/cx88.h
index e75547827c52..785fe2e0d702 100644
--- a/drivers/media/pci/cx88/cx88.h
+++ b/drivers/media/pci/cx88/cx88.h
@@ -375,9 +375,10 @@ struct cx88_core {
 
 	/* config info -- dvb */
 #if IS_ENABLED(CONFIG_VIDEO_CX88_DVB)
-	int 			   (*prev_set_voltage)(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+	int	(*prev_set_voltage)(struct dvb_frontend *fe,
+				    enum fe_sec_voltage voltage);
 #endif
-	void			   (*gate_ctrl)(struct cx88_core  *core, int open);
+	void	(*gate_ctrl)(struct cx88_core *core, int open);
 
 	/* state info */
 	struct task_struct         *kthread;
diff --git a/drivers/media/pci/dm1105/dm1105.c b/drivers/media/pci/dm1105/dm1105.c
index ed11716731e9..88915fb87e80 100644
--- a/drivers/media/pci/dm1105/dm1105.c
+++ b/drivers/media/pci/dm1105/dm1105.c
@@ -591,7 +591,8 @@ static inline struct dm1105_dev *frontend_to_dm1105_dev(struct dvb_frontend *fe)
 	return container_of(fe->dvb, struct dm1105_dev, dvb_adapter);
 }
 
-static int dm1105_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int dm1105_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	struct dm1105_dev *dev = frontend_to_dm1105_dev(fe);
 
diff --git a/drivers/media/pci/mantis/mantis_vp1034.c b/drivers/media/pci/mantis/mantis_vp1034.c
index 7c1bd167225c..3b1928594b12 100644
--- a/drivers/media/pci/mantis/mantis_vp1034.c
+++ b/drivers/media/pci/mantis/mantis_vp1034.c
@@ -44,7 +44,7 @@ static struct mb86a16_config vp1034_mb86a16_config = {
 #define MANTIS_MODEL_NAME	"VP-1034"
 #define MANTIS_DEV_TYPE		"DVB-S/DSS"
 
-int vp1034_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+int vp1034_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage)
 {
 	struct mantis_pci *mantis = fe->dvb->priv;
 
diff --git a/drivers/media/pci/mantis/mantis_vp1034.h b/drivers/media/pci/mantis/mantis_vp1034.h
index 323f38ef8e3d..764b1c66ea1b 100644
--- a/drivers/media/pci/mantis/mantis_vp1034.h
+++ b/drivers/media/pci/mantis/mantis_vp1034.h
@@ -28,6 +28,7 @@
 #define MANTIS_VP_1034_DVB_S	0x0014
 
 extern struct mantis_hwconfig vp1034_config;
-extern int vp1034_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+extern int vp1034_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage);
 
 #endif /* __MANTIS_VP1034_H */
diff --git a/drivers/media/pci/ngene/ngene.h b/drivers/media/pci/ngene/ngene.h
index 51e2fbd18b1b..fa30930d7047 100644
--- a/drivers/media/pci/ngene/ngene.h
+++ b/drivers/media/pci/ngene/ngene.h
@@ -682,7 +682,7 @@ struct ngene_channel {
 	int                   AudioDTOUpdated;
 	u32                   AudioDTOValue;
 
-	int (*set_tone)(struct dvb_frontend *, fe_sec_tone_mode_t);
+	int (*set_tone)(struct dvb_frontend *, enum fe_sec_tone_mode);
 	u8 lnbh;
 
 	/* stuff from analog driver */
diff --git a/drivers/media/pci/pt1/pt1.c b/drivers/media/pci/pt1/pt1.c
index acc35b42e53c..e7e4428109c3 100644
--- a/drivers/media/pci/pt1/pt1.c
+++ b/drivers/media/pci/pt1/pt1.c
@@ -101,11 +101,11 @@ struct pt1_adapter {
 	struct dmxdev dmxdev;
 	struct dvb_frontend *fe;
 	int (*orig_set_voltage)(struct dvb_frontend *fe,
-				fe_sec_voltage_t voltage);
+				enum fe_sec_voltage voltage);
 	int (*orig_sleep)(struct dvb_frontend *fe);
 	int (*orig_init)(struct dvb_frontend *fe);
 
-	fe_sec_voltage_t voltage;
+	enum fe_sec_voltage voltage;
 	int sleep;
 };
 
@@ -575,7 +575,7 @@ pt1_update_power(struct pt1 *pt1)
 	mutex_unlock(&pt1->lock);
 }
 
-static int pt1_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int pt1_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage)
 {
 	struct pt1_adapter *adap;
 
diff --git a/drivers/media/pci/pt1/va1j5jf8007s.c b/drivers/media/pci/pt1/va1j5jf8007s.c
index 1b637b74ef58..d0e70dc0e16f 100644
--- a/drivers/media/pci/pt1/va1j5jf8007s.c
+++ b/drivers/media/pci/pt1/va1j5jf8007s.c
@@ -108,7 +108,7 @@ static int va1j5jf8007s_get_frontend_algo(struct dvb_frontend *fe)
 }
 
 static int
-va1j5jf8007s_read_status(struct dvb_frontend *fe, fe_status_t *status)
+va1j5jf8007s_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct va1j5jf8007s_state *state;
 
@@ -387,7 +387,7 @@ static int
 va1j5jf8007s_tune(struct dvb_frontend *fe,
 		  bool re_tune,
 		  unsigned int mode_flags,  unsigned int *delay,
-		  fe_status_t *status)
+		  enum fe_status *status)
 {
 	struct va1j5jf8007s_state *state;
 	int ret;
diff --git a/drivers/media/pci/pt1/va1j5jf8007t.c b/drivers/media/pci/pt1/va1j5jf8007t.c
index 2db15159d514..0268f20b8097 100644
--- a/drivers/media/pci/pt1/va1j5jf8007t.c
+++ b/drivers/media/pci/pt1/va1j5jf8007t.c
@@ -98,7 +98,7 @@ static int va1j5jf8007t_get_frontend_algo(struct dvb_frontend *fe)
 }
 
 static int
-va1j5jf8007t_read_status(struct dvb_frontend *fe, fe_status_t *status)
+va1j5jf8007t_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct va1j5jf8007t_state *state;
 
@@ -266,7 +266,7 @@ static int
 va1j5jf8007t_tune(struct dvb_frontend *fe,
 		  bool re_tune,
 		  unsigned int mode_flags,  unsigned int *delay,
-		  fe_status_t *status)
+		  enum fe_status *status)
 {
 	struct va1j5jf8007t_state *state;
 	int ret;
diff --git a/drivers/media/pci/pt3/pt3.c b/drivers/media/pci/pt3/pt3.c
index 7a37e8fe2ee2..0d2e2b217121 100644
--- a/drivers/media/pci/pt3/pt3.c
+++ b/drivers/media/pci/pt3/pt3.c
@@ -188,7 +188,7 @@ static int pt3_set_lna(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int pt3_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t volt)
+static int pt3_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage volt)
 {
 	struct pt3_adapter *adap;
 	struct pt3_board *pt3;
diff --git a/drivers/media/pci/saa7134/saa7134-dvb.c b/drivers/media/pci/saa7134/saa7134-dvb.c
index d47fb22e12f2..101ba8729416 100644
--- a/drivers/media/pci/saa7134/saa7134-dvb.c
+++ b/drivers/media/pci/saa7134/saa7134-dvb.c
@@ -987,7 +987,8 @@ static struct tda10086_config sd1878_4m = {
  * special case: lnb supply is connected to the gated i2c
  */
 
-static int md8800_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int md8800_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	int res = -EIO;
 	struct saa7134_dev *dev = fe->dvb->priv;
@@ -1013,7 +1014,8 @@ static int md8800_set_high_voltage(struct dvb_frontend *fe, long arg)
 	return res;
 };
 
-static int md8800_set_voltage2(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int md8800_set_voltage2(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct saa7134_dev *dev = fe->dvb->priv;
 	u8 wbuf[2] = { 0x1f, 00 };
diff --git a/drivers/media/pci/saa7134/saa7134.h b/drivers/media/pci/saa7134/saa7134.h
index 6fec01711680..f682ba9b34a7 100644
--- a/drivers/media/pci/saa7134/saa7134.h
+++ b/drivers/media/pci/saa7134/saa7134.h
@@ -656,7 +656,8 @@ struct saa7134_dev {
 	/* SAA7134_MPEG_DVB only */
 	struct vb2_dvb_frontends frontends;
 	int (*original_demod_sleep)(struct dvb_frontend *fe);
-	int (*original_set_voltage)(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+	int (*original_set_voltage)(struct dvb_frontend *fe,
+				    enum fe_sec_voltage voltage);
 	int (*original_set_high_voltage)(struct dvb_frontend *fe, long arg);
 #endif
 	void (*gate_ctrl)(struct saa7134_dev *dev, int open);
diff --git a/drivers/media/pci/ttpci/av7110.c b/drivers/media/pci/ttpci/av7110.c
index 45199a12b9d9..3f24fce74fc1 100644
--- a/drivers/media/pci/ttpci/av7110.c
+++ b/drivers/media/pci/ttpci/av7110.c
@@ -1172,7 +1172,7 @@ static int dvb_get_stc(struct dmx_demux *demux, unsigned int num,
  ******************************************************************************/
 
 
-static int av7110_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int av7110_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -1197,7 +1197,7 @@ static int av7110_diseqc_send_master_cmd(struct dvb_frontend* fe,
 }
 
 static int av7110_diseqc_send_burst(struct dvb_frontend* fe,
-				    fe_sec_mini_cmd_t minicmd)
+				    enum fe_sec_mini_cmd minicmd)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -1946,7 +1946,7 @@ static struct l64781_config grundig_29504_401_config = {
 
 
-static int av7110_fe_lock_fix(struct av7110* av7110, fe_status_t status)
+static int av7110_fe_lock_fix(struct av7110 *av7110, enum fe_status status)
 {
 	int ret = 0;
 	int synced = (status & FE_HAS_LOCK) ? 1 : 0;
@@ -2008,7 +2008,8 @@ static int av7110_fe_init(struct dvb_frontend* fe)
 	return ret;
 }
 
-static int av7110_fe_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int av7110_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -2043,7 +2044,8 @@ static int av7110_fe_diseqc_send_master_cmd(struct dvb_frontend* fe,
 	return ret;
 }
 
-static int av7110_fe_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int av7110_fe_diseqc_send_burst(struct dvb_frontend *fe,
+				       enum fe_sec_mini_cmd minicmd)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -2055,7 +2057,8 @@ static int av7110_fe_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_
 	return ret;
 }
 
-static int av7110_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int av7110_fe_set_tone(struct dvb_frontend *fe,
+			      enum fe_sec_tone_mode tone)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -2067,7 +2070,8 @@ static int av7110_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
 	return ret;
 }
 
-static int av7110_fe_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int av7110_fe_set_voltage(struct dvb_frontend *fe,
+				 enum fe_sec_voltage voltage)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
diff --git a/drivers/media/pci/ttpci/av7110.h b/drivers/media/pci/ttpci/av7110.h
index 835635b0c712..3a55927edb95 100644
--- a/drivers/media/pci/ttpci/av7110.h
+++ b/drivers/media/pci/ttpci/av7110.h
@@ -269,25 +269,30 @@ struct av7110 {
 	unsigned long size_root;
 
 	struct dvb_frontend* fe;
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 
 	struct mutex ioctl_mutex;
 
 	/* crash recovery */
 	void				(*recover)(struct av7110* av7110);
-	fe_sec_voltage_t		saved_voltage;
-	fe_sec_tone_mode_t		saved_tone;
+	enum fe_sec_voltage		saved_voltage;
+	enum fe_sec_tone_mode		saved_tone;
 	struct dvb_diseqc_master_cmd	saved_master_cmd;
-	fe_sec_mini_cmd_t		saved_minicmd;
+	enum fe_sec_mini_cmd		saved_minicmd;
 
 	int (*fe_init)(struct dvb_frontend* fe);
-	int (*fe_read_status)(struct dvb_frontend* fe, fe_status_t* status);
-	int (*fe_diseqc_reset_overload)(struct dvb_frontend* fe);
-	int (*fe_diseqc_send_master_cmd)(struct dvb_frontend* fe, struct dvb_diseqc_master_cmd* cmd);
-	int (*fe_diseqc_send_burst)(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd);
-	int (*fe_set_tone)(struct dvb_frontend* fe, fe_sec_tone_mode_t tone);
-	int (*fe_set_voltage)(struct dvb_frontend* fe, fe_sec_voltage_t voltage);
-	int (*fe_dishnetwork_send_legacy_command)(struct dvb_frontend* fe, unsigned long cmd);
+	int (*fe_read_status)(struct dvb_frontend *fe, enum fe_status *status);
+	int (*fe_diseqc_reset_overload)(struct dvb_frontend *fe);
+	int (*fe_diseqc_send_master_cmd)(struct dvb_frontend *fe,
+					 struct dvb_diseqc_master_cmd *cmd);
+	int (*fe_diseqc_send_burst)(struct dvb_frontend *fe,
+				    enum fe_sec_mini_cmd minicmd);
+	int (*fe_set_tone)(struct dvb_frontend *fe,
+			   enum fe_sec_tone_mode tone);
+	int (*fe_set_voltage)(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage);
+	int (*fe_dishnetwork_send_legacy_command)(struct dvb_frontend *fe,
+						  unsigned long cmd);
 	int (*fe_set_frontend)(struct dvb_frontend *fe);
 };
 
diff --git a/drivers/media/pci/ttpci/budget-core.c b/drivers/media/pci/ttpci/budget-core.c
index 23e05499b509..e9674b40007c 100644
--- a/drivers/media/pci/ttpci/budget-core.c
+++ b/drivers/media/pci/ttpci/budget-core.c
@@ -161,7 +161,8 @@ static int start_ts_capture(struct budget *budget)
 	return 0;
 }
 
-static int budget_read_fe_status(struct dvb_frontend *fe, fe_status_t *status)
+static int budget_read_fe_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct budget *budget = (struct budget *) fe->dvb->priv;
 	int synced;
diff --git a/drivers/media/pci/ttpci/budget-patch.c b/drivers/media/pci/ttpci/budget-patch.c
index a4d8867e1d7b..b5b65962ce8f 100644
--- a/drivers/media/pci/ttpci/budget-patch.c
+++ b/drivers/media/pci/ttpci/budget-patch.c
@@ -128,9 +128,9 @@ static int SendDiSEqCMsg (struct budget *budget, int len, u8 *msg, unsigned long
 	return 0;
 }
 
-/* shamelessly copy/pasted from budget.c
-*/
-static int budget_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+/* shamelessly copy/pasted from budget.c */
+static int budget_set_tone(struct dvb_frontend *fe,
+			   enum fe_sec_tone_mode tone)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
@@ -159,7 +159,8 @@ static int budget_diseqc_send_master_cmd(struct dvb_frontend* fe, struct dvb_dis
 	return 0;
 }
 
-static int budget_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int budget_diseqc_send_burst(struct dvb_frontend *fe,
+				    enum fe_sec_mini_cmd minicmd)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
@@ -223,7 +224,8 @@ static int av7110_send_diseqc_msg(struct budget_patch *budget, int len, u8 *msg,
 	return 0;
 }
 
-static int budget_patch_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int budget_patch_set_tone(struct dvb_frontend *fe,
+				 enum fe_sec_tone_mode tone)
 {
 	struct budget_patch* budget = (struct budget_patch*) fe->dvb->priv;
 
@@ -252,7 +254,8 @@ static int budget_patch_diseqc_send_master_cmd(struct dvb_frontend* fe, struct d
 	return 0;
 }
 
-static int budget_patch_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int budget_patch_diseqc_send_burst(struct dvb_frontend *fe,
+					  enum fe_sec_mini_cmd minicmd)
 {
 	struct budget_patch* budget = (struct budget_patch*) fe->dvb->priv;
 
diff --git a/drivers/media/pci/ttpci/budget.c b/drivers/media/pci/ttpci/budget.c
index 6ccc48833fd8..99972beca262 100644
--- a/drivers/media/pci/ttpci/budget.c
+++ b/drivers/media/pci/ttpci/budget.c
@@ -132,7 +132,8 @@ static int SendDiSEqCMsg (struct budget *budget, int len, u8 *msg, unsigned long
  *   Voltage must be set here.
  *   GPIO 1: LNBP EN, GPIO 2: LNBP VSEL
  */
-static int SetVoltage_Activy (struct budget *budget, fe_sec_voltage_t voltage)
+static int SetVoltage_Activy(struct budget *budget,
+			     enum fe_sec_voltage voltage)
 {
 	struct saa7146_dev *dev=budget->dev;
 
@@ -157,14 +158,16 @@ static int SetVoltage_Activy (struct budget *budget, fe_sec_voltage_t voltage)
 	return 0;
 }
 
-static int siemens_budget_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int siemens_budget_set_voltage(struct dvb_frontend *fe,
+				      enum fe_sec_voltage voltage)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
 	return SetVoltage_Activy (budget, voltage);
 }
 
-static int budget_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int budget_set_tone(struct dvb_frontend *fe,
+			   enum fe_sec_tone_mode tone)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
@@ -193,7 +196,8 @@ static int budget_diseqc_send_master_cmd(struct dvb_frontend* fe, struct dvb_dis
 	return 0;
 }
 
-static int budget_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int budget_diseqc_send_burst(struct dvb_frontend *fe,
+				    enum fe_sec_mini_cmd minicmd)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
diff --git a/drivers/media/pci/ttpci/budget.h b/drivers/media/pci/ttpci/budget.h
index 3d8a806c20bb..1ccbe1a49a4b 100644
--- a/drivers/media/pci/ttpci/budget.h
+++ b/drivers/media/pci/ttpci/budget.h
@@ -72,7 +72,7 @@ struct budget {
 
 	struct dvb_adapter dvb_adapter;
 	struct dvb_frontend *dvb_frontend;
-	int (*read_fe_status)(struct dvb_frontend *fe, fe_status_t *status);
+	int (*read_fe_status)(struct dvb_frontend *fe, enum fe_status *status);
 	int fe_synced;
 
 	void *priv;
diff --git a/drivers/media/usb/dvb-usb-v2/af9015.c b/drivers/media/usb/dvb-usb-v2/af9015.c
index 16c0b7d4f8e7..95a7388e89d4 100644
--- a/drivers/media/usb/dvb-usb-v2/af9015.c
+++ b/drivers/media/usb/dvb-usb-v2/af9015.c
@@ -641,7 +641,7 @@ static int af9015_af9013_set_frontend(struct dvb_frontend *fe)
 
 /* override demod callbacks for resource locking */
 static int af9015_af9013_read_status(struct dvb_frontend *fe,
-	fe_status_t *status)
+	enum fe_status *status)
 {
 	int ret;
 	struct af9015_state *state = fe_to_priv(fe);
diff --git a/drivers/media/usb/dvb-usb-v2/af9015.h b/drivers/media/usb/dvb-usb-v2/af9015.h
index 3a6f3ad1eadb..1db1bb0d57bc 100644
--- a/drivers/media/usb/dvb-usb-v2/af9015.h
+++ b/drivers/media/usb/dvb-usb-v2/af9015.h
@@ -133,7 +133,7 @@ struct af9015_state {
 
 	/* for demod callback override */
 	int (*set_frontend[2]) (struct dvb_frontend *fe);
-	int (*read_status[2]) (struct dvb_frontend *fe, fe_status_t *status);
+	int (*read_status[2]) (struct dvb_frontend *fe, enum fe_status *status);
 	int (*init[2]) (struct dvb_frontend *fe);
 	int (*sleep[2]) (struct dvb_frontend *fe);
 	int (*tuner_init[2]) (struct dvb_frontend *fe);
diff --git a/drivers/media/usb/dvb-usb-v2/dvbsky.c b/drivers/media/usb/dvb-usb-v2/dvbsky.c
index 57c8c2db9f2d..5cc01bbdede9 100644
--- a/drivers/media/usb/dvb-usb-v2/dvbsky.c
+++ b/drivers/media/usb/dvb-usb-v2/dvbsky.c
@@ -45,9 +45,9 @@ struct dvbsky_state {
 
 	/* fe hook functions*/
 	int (*fe_set_voltage)(struct dvb_frontend *fe,
-		fe_sec_voltage_t voltage);
+		enum fe_sec_voltage voltage);
 	int (*fe_read_status)(struct dvb_frontend *fe,
-		fe_status_t *status);
+		enum fe_status *status);
 };
 
 static int dvbsky_usb_generic_rw(struct dvb_usb_device *d,
@@ -237,7 +237,7 @@ static int dvbsky_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc)
 #endif
 
 static int dvbsky_usb_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+	enum fe_sec_voltage voltage)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct dvbsky_state *state = d_to_priv(d);
@@ -277,7 +277,8 @@ static int dvbsky_read_mac_addr(struct dvb_usb_adapter *adap, u8 mac[6])
 	return 0;
 }
 
-static int dvbsky_usb_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int dvbsky_usb_read_status(struct dvb_frontend *fe,
+				  enum fe_status *status)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct dvbsky_state *state = d_to_priv(d);
@@ -368,7 +369,7 @@ fail_attach:
 }
 
 static int dvbsky_usb_ci_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+	enum fe_sec_voltage voltage)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct dvbsky_state *state = d_to_priv(d);
diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c
index 5de6f7c04d09..1b6ca42ad116 100644
--- a/drivers/media/usb/dvb-usb-v2/lmedm04.c
+++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c
@@ -126,7 +126,7 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
 struct lme2510_state {
 	unsigned long int_urb_due;
-	fe_status_t lock_status;
+	enum fe_status lock_status;
 	u8 id;
 	u8 tuner_config;
 	u8 signal_level;
@@ -144,12 +144,12 @@ struct lme2510_state {
 	struct urb *lme_urb;
 	void *usb_buffer;
 	/* Frontend original calls */
-	int (*fe_read_status)(struct dvb_frontend *, fe_status_t *);
+	int (*fe_read_status)(struct dvb_frontend *, enum fe_status *);
 	int (*fe_read_signal_strength)(struct dvb_frontend *, u16 *);
 	int (*fe_read_snr)(struct dvb_frontend *, u16 *);
 	int (*fe_read_ber)(struct dvb_frontend *, u32 *);
 	int (*fe_read_ucblocks)(struct dvb_frontend *, u32 *);
-	int (*fe_set_voltage)(struct dvb_frontend *, fe_sec_voltage_t);
+	int (*fe_set_voltage)(struct dvb_frontend *, enum fe_sec_voltage);
 	u8 dvb_usb_lme2510_firmware;
 };
 
@@ -802,7 +802,7 @@ static struct ts2020_config ts2020_config = {
 };
 
 static int dm04_lme2510_set_voltage(struct dvb_frontend *fe,
-					fe_sec_voltage_t voltage)
+				    enum fe_sec_voltage voltage)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct lme2510_state *st = fe_to_priv(fe);
@@ -837,7 +837,7 @@ static int dm04_lme2510_set_voltage(struct dvb_frontend *fe,
 	return (ret < 0) ? -ENODEV : 0;
 }
 
-static int dm04_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int dm04_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct lme2510_state *st = d->priv;
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
index ecefa5c477fa..ea3753653368 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
@@ -72,7 +72,7 @@ int mxl111sf_demod_program_regs(struct mxl111sf_demod_state *state,
 
 static
 int mxl1x1sf_demod_get_tps_code_rate(struct mxl111sf_demod_state *state,
-				     fe_code_rate_t *code_rate)
+				     enum fe_code_rate *code_rate)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_CODE_RATE_TPS_REG, &val);
@@ -103,7 +103,7 @@ fail:
 
 static
 int mxl1x1sf_demod_get_tps_modulation(struct mxl111sf_demod_state *state,
-					 fe_modulation_t *modulation)
+				      enum fe_modulation *modulation)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_MODORDER_TPS_REG, &val);
@@ -128,7 +128,7 @@ fail:
 
 static
 int mxl1x1sf_demod_get_tps_guard_fft_mode(struct mxl111sf_demod_state *state,
-					  fe_transmit_mode_t *fft_mode)
+					  enum fe_transmit_mode *fft_mode)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_MODE_TPS_REG, &val);
@@ -153,7 +153,7 @@ fail:
 
 static
 int mxl1x1sf_demod_get_tps_guard_interval(struct mxl111sf_demod_state *state,
-					  fe_guard_interval_t *guard)
+					  enum fe_guard_interval *guard)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_CP_TPS_REG, &val);
@@ -181,7 +181,7 @@ fail:
 
 static
 int mxl1x1sf_demod_get_tps_hierarchy(struct mxl111sf_demod_state *state,
-				     fe_hierarchy_t *hierarchy)
+				     enum fe_hierarchy *hierarchy)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_TPS_HIERACHY_REG, &val);
@@ -441,7 +441,7 @@ fail:
 }
 
 static int mxl111sf_demod_read_status(struct dvb_frontend *fe,
-				      fe_status_t *status)
+				      enum fe_status *status)
 {
 	struct mxl111sf_demod_state *state = fe->demodulator_priv;
 	int ret, locked, cr_lock, sync_lock, fec_lock;
@@ -480,7 +480,7 @@ static int mxl111sf_demod_read_signal_strength(struct dvb_frontend *fe,
 					       u16 *signal_strength)
 {
 	struct mxl111sf_demod_state *state = fe->demodulator_priv;
-	fe_modulation_t modulation;
+	enum fe_modulation modulation;
 	u16 snr;
 
 	mxl111sf_demod_calc_snr(state, &snr);
diff --git a/drivers/media/usb/dvb-usb/af9005-fe.c b/drivers/media/usb/dvb-usb/af9005-fe.c
index 6e84a546dfdc..ac97075d75f7 100644
--- a/drivers/media/usb/dvb-usb/af9005-fe.c
+++ b/drivers/media/usb/dvb-usb/af9005-fe.c
@@ -29,7 +29,7 @@
 
 struct af9005_fe_state {
 	struct dvb_usb_device *d;
-	fe_status_t stat;
+	enum fe_status stat;
 
 	/* retraining parameters */
 	u32 original_fcw;
@@ -437,7 +437,8 @@ static int af9005_fe_refresh_state(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int af9005_fe_read_status(struct dvb_frontend *fe, fe_status_t * stat)
+static int af9005_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *stat)
 {
 	struct af9005_fe_state *state = fe->demodulator_priv;
 	u8 temp;
diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c
index 0df52ab32a7b..92e47d6c3ee3 100644
--- a/drivers/media/usb/dvb-usb/az6027.c
+++ b/drivers/media/usb/dvb-usb/az6027.c
@@ -778,7 +778,8 @@ static int az6027_read_mac_addr(struct dvb_usb_device *d, u8 mac[6])
 }
 */
 
-static int az6027_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int az6027_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 
 	u8 buf;
diff --git a/drivers/media/usb/dvb-usb/cinergyT2-fe.c b/drivers/media/usb/dvb-usb/cinergyT2-fe.c
index c890fe46acd3..b3ec743a7a2e 100644
--- a/drivers/media/usb/dvb-usb/cinergyT2-fe.c
+++ b/drivers/media/usb/dvb-usb/cinergyT2-fe.c
@@ -142,7 +142,7 @@ struct cinergyt2_fe_state {
 };
 
 static int cinergyt2_fe_read_status(struct dvb_frontend *fe,
-					fe_status_t *status)
+				    enum fe_status *status)
 {
 	struct cinergyt2_fe_state *state = fe->demodulator_priv;
 	struct dvbt_get_status_msg result;
diff --git a/drivers/media/usb/dvb-usb/dib0700.h b/drivers/media/usb/dvb-usb/dib0700.h
index 927617d95616..8fd8f5b489d2 100644
--- a/drivers/media/usb/dvb-usb/dib0700.h
+++ b/drivers/media/usb/dvb-usb/dib0700.h
@@ -48,7 +48,7 @@ struct dib0700_state {
 	u8 disable_streaming_master_mode;
 	u32 fw_version;
 	u32 nb_packet_buffer_size;
-	int (*read_status)(struct dvb_frontend *, fe_status_t *);
+	int (*read_status)(struct dvb_frontend *, enum fe_status *);
 	int (*sleep)(struct dvb_frontend* fe);
 	u8 buf[255];
 };
diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c
index 5a3dbb8c7658..7ed49646a699 100644
--- a/drivers/media/usb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/usb/dvb-usb/dib0700_devices.c
@@ -3309,7 +3309,7 @@ static int stk7070pd_frontend_attach1(struct dvb_usb_adapter *adap)
 }
 
 static int novatd_read_status_override(struct dvb_frontend *fe,
-		fe_status_t *stat)
+				       enum fe_status *stat)
 {
 	struct dvb_usb_adapter *adap = fe->dvb->priv;
 	struct dvb_usb_device *dev = adap->dev;
diff --git a/drivers/media/usb/dvb-usb/dtt200u-fe.c b/drivers/media/usb/dvb-usb/dtt200u-fe.c
index 3d81daa49172..8637ad1be6be 100644
--- a/drivers/media/usb/dvb-usb/dtt200u-fe.c
+++ b/drivers/media/usb/dvb-usb/dtt200u-fe.c
@@ -14,13 +14,14 @@
 struct dtt200u_fe_state {
 	struct dvb_usb_device *d;
 
-	fe_status_t stat;
+	enum fe_status stat;
 
 	struct dtv_frontend_properties fep;
 	struct dvb_frontend frontend;
 };
 
-static int dtt200u_fe_read_status(struct dvb_frontend* fe, fe_status_t *stat)
+static int dtt200u_fe_read_status(struct dvb_frontend *fe,
+				  enum fe_status *stat)
 {
 	struct dtt200u_fe_state *state = fe->demodulator_priv;
 	u8 st = GET_TUNE_STATUS, b[3];
@@ -105,7 +106,7 @@ static int dtt200u_fe_set_frontend(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *fep = &fe->dtv_property_cache;
 	struct dtt200u_fe_state *state = fe->demodulator_priv;
 	int i;
-	fe_status_t st;
+	enum fe_status st;
 	u16 freq = fep->frequency / 250000;
 	u8 bwbuf[2] = { SET_BANDWIDTH, 0 },freqbuf[3] = { SET_RF_FREQ, 0, 0 };
 
diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c
index c2632bc9e530..14ef25dc6cd3 100644
--- a/drivers/media/usb/dvb-usb/dw2102.c
+++ b/drivers/media/usb/dvb-usb/dw2102.c
@@ -121,9 +121,9 @@ struct dw2102_state {
 	struct i2c_client *i2c_client_tuner;
 
 	/* fe hook functions*/
-	int (*old_set_voltage)(struct dvb_frontend *f, fe_sec_voltage_t v);
+	int (*old_set_voltage)(struct dvb_frontend *f, enum fe_sec_voltage v);
 	int (*fe_read_status)(struct dvb_frontend *fe,
-		fe_status_t *status);
+			      enum fe_status *status);
 };
 
 /* debug */
@@ -949,7 +949,8 @@ static int su3000_identify_state(struct usb_device *udev,
 	return 0;
 }
 
-static int dw210x_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int dw210x_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	static u8 command_13v[] = {0x00, 0x01};
 	static u8 command_18v[] = {0x01, 0x01};
@@ -973,7 +974,8 @@ static int dw210x_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
 	return 0;
 }
 
-static int s660_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int s660_set_voltage(struct dvb_frontend *fe,
+			    enum fe_sec_voltage voltage)
 {
 	struct dvb_usb_adapter *d =
 		(struct dvb_usb_adapter *)(fe->dvb->priv);
@@ -1004,7 +1006,8 @@ static void dw210x_led_ctrl(struct dvb_frontend *fe, int offon)
 	i2c_transfer(&udev_adap->dev->i2c_adap, &msg, 1);
 }
 
-static int tt_s2_4600_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tt_s2_4600_read_status(struct dvb_frontend *fe,
+				  enum fe_status *status)
 {
 	struct dvb_usb_adapter *d =
 		(struct dvb_usb_adapter *)(fe->dvb->priv);
diff --git a/drivers/media/usb/dvb-usb/friio-fe.c b/drivers/media/usb/dvb-usb/friio-fe.c
index d56f927fc31a..8ec92fbeabad 100644
--- a/drivers/media/usb/dvb-usb/friio-fe.c
+++ b/drivers/media/usb/dvb-usb/friio-fe.c
@@ -210,7 +210,8 @@ error:
 	return -EREMOTEIO;
 }
 
-static int jdvbt90502_read_status(struct dvb_frontend *fe, fe_status_t *state)
+static int jdvbt90502_read_status(struct dvb_frontend *fe,
+				  enum fe_status *state)
 {
 	u8 result;
 	int ret;
diff --git a/drivers/media/usb/dvb-usb/gp8psk-fe.c b/drivers/media/usb/dvb-usb/gp8psk-fe.c
index 67957dd99ede..db6eb79cde07 100644
--- a/drivers/media/usb/dvb-usb/gp8psk-fe.c
+++ b/drivers/media/usb/dvb-usb/gp8psk-fe.c
@@ -51,7 +51,8 @@ static int gp8psk_fe_update_status(struct gp8psk_fe_state *st)
 	return 0;
 }
 
-static int gp8psk_fe_read_status(struct dvb_frontend* fe, fe_status_t *status)
+static int gp8psk_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct gp8psk_fe_state *st = fe->demodulator_priv;
 	gp8psk_fe_update_status(st);
@@ -236,8 +237,8 @@ static int gp8psk_fe_send_diseqc_msg (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int gp8psk_fe_send_diseqc_burst (struct dvb_frontend* fe,
-				    fe_sec_mini_cmd_t burst)
+static int gp8psk_fe_send_diseqc_burst(struct dvb_frontend *fe,
+				       enum fe_sec_mini_cmd burst)
 {
 	struct gp8psk_fe_state *st = fe->demodulator_priv;
 	u8 cmd;
@@ -254,7 +255,8 @@ static int gp8psk_fe_send_diseqc_burst (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int gp8psk_fe_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int gp8psk_fe_set_tone(struct dvb_frontend *fe,
+			      enum fe_sec_tone_mode tone)
 {
 	struct gp8psk_fe_state* state = fe->demodulator_priv;
 
@@ -265,7 +267,8 @@ static int gp8psk_fe_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
 	return 0;
 }
 
-static int gp8psk_fe_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int gp8psk_fe_set_voltage(struct dvb_frontend *fe,
+				 enum fe_sec_voltage voltage)
 {
 	struct gp8psk_fe_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/usb/dvb-usb/opera1.c b/drivers/media/usb/dvb-usb/opera1.c
index 14a2119912ba..2566d2f1c2ad 100644
--- a/drivers/media/usb/dvb-usb/opera1.c
+++ b/drivers/media/usb/dvb-usb/opera1.c
@@ -167,7 +167,8 @@ static struct i2c_algorithm opera1_i2c_algo = {
 	.functionality = opera1_i2c_func,
 };
 
-static int opera1_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int opera1_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	static u8 command_13v[1]={0x00};
 	static u8 command_18v[1]={0x01};
diff --git a/drivers/media/usb/dvb-usb/technisat-usb2.c b/drivers/media/usb/dvb-usb/technisat-usb2.c
index 5801ae7f672a..03f334d3a8f4 100644
--- a/drivers/media/usb/dvb-usb/technisat-usb2.c
+++ b/drivers/media/usb/dvb-usb/technisat-usb2.c
@@ -453,7 +453,7 @@ static struct stv090x_config technisat_usb2_stv090x_config;
 
 /* frontend attach */
 static int technisat_usb2_set_voltage(struct dvb_frontend *fe,
-		fe_sec_voltage_t voltage)
+				      enum fe_sec_voltage voltage)
 {
 	int i;
 	u8 gpio[3] = { 0 }; /* 0 = 2, 1 = 3, 2 = 4 */
diff --git a/drivers/media/usb/dvb-usb/vp702x-fe.c b/drivers/media/usb/dvb-usb/vp702x-fe.c
index 5eab468dd904..d361a72ca0fa 100644
--- a/drivers/media/usb/dvb-usb/vp702x-fe.c
+++ b/drivers/media/usb/dvb-usb/vp702x-fe.c
@@ -26,8 +26,8 @@ struct vp702x_fe_state {
 
 	struct dvb_frontend_ops ops;
 
-	fe_sec_voltage_t voltage;
-	fe_sec_tone_mode_t tone_mode;
+	enum fe_sec_voltage voltage;
+	enum fe_sec_tone_mode tone_mode;
 
 	u8 lnb_buf[8];
 
@@ -72,7 +72,8 @@ static u8 vp702x_chksum(u8 *buf,int f, int count)
 	return ~s+1;
 }
 
-static int vp702x_fe_read_status(struct dvb_frontend* fe, fe_status_t *status)
+static int vp702x_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct vp702x_fe_state *st = fe->demodulator_priv;
 	vp702x_fe_refresh_state(st);
@@ -243,13 +244,15 @@ static int vp702x_fe_send_diseqc_msg (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int vp702x_fe_send_diseqc_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t burst)
+static int vp702x_fe_send_diseqc_burst(struct dvb_frontend *fe,
+				       enum fe_sec_mini_cmd burst)
 {
 	deb_fe("%s\n",__func__);
 	return 0;
 }
 
-static int vp702x_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int vp702x_fe_set_tone(struct dvb_frontend *fe,
+			      enum fe_sec_tone_mode tone)
 {
 	struct vp702x_fe_state *st = fe->demodulator_priv;
 	struct vp702x_device_state *dst = st->d->priv;
@@ -282,8 +285,8 @@ static int vp702x_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
 	return 0;
 }
 
-static int vp702x_fe_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t
-		voltage)
+static int vp702x_fe_set_voltage(struct dvb_frontend *fe,
+				 enum fe_sec_voltage voltage)
 {
 	struct vp702x_fe_state *st = fe->demodulator_priv;
 	struct vp702x_device_state *dst = st->d->priv;
diff --git a/drivers/media/usb/dvb-usb/vp7045-fe.c b/drivers/media/usb/dvb-usb/vp7045-fe.c
index b8825b18c003..e708afc6a57f 100644
--- a/drivers/media/usb/dvb-usb/vp7045-fe.c
+++ b/drivers/media/usb/dvb-usb/vp7045-fe.c
@@ -26,7 +26,8 @@ struct vp7045_fe_state {
 	struct dvb_usb_device *d;
 };
 
-static int vp7045_fe_read_status(struct dvb_frontend* fe, fe_status_t *status)
+static int vp7045_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct vp7045_fe_state *state = fe->demodulator_priv;
 	u8 s0 = vp7045_read_reg(state->d,0x00),
diff --git a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
index cef7a00099ea..d52d4a8d39ad 100644
--- a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
+++ b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
@@ -111,8 +111,8 @@ struct ttusb {
 	int last_filter;
 
 	u8 c;			/* transaction counter, wraps around...  */
-	fe_sec_tone_mode_t tone;
-	fe_sec_voltage_t voltage;
+	enum fe_sec_tone_mode tone;
+	enum fe_sec_voltage voltage;
 
 	int mux_state;		// 0..2 - MuxSyncWord, 3 - nMuxPacks,    4 - muxpack
 	u8 mux_npacks;
@@ -511,7 +511,8 @@ static int ttusb_update_lnb(struct ttusb *ttusb)
 	return err;
 }
 
-static int ttusb_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int ttusb_set_voltage(struct dvb_frontend *fe,
+			     enum fe_sec_voltage voltage)
 {
 	struct ttusb* ttusb = (struct ttusb*) fe->dvb->priv;
 
@@ -520,7 +521,7 @@ static int ttusb_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
 }
 
 #ifdef TTUSB_TONE
-static int ttusb_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int ttusb_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct ttusb* ttusb = (struct ttusb*) fe->dvb->priv;
 
diff --git a/drivers/media/usb/ttusb-dec/ttusbdecfe.c b/drivers/media/usb/ttusb-dec/ttusbdecfe.c
index 9c29552aedec..8781335ab92f 100644
--- a/drivers/media/usb/ttusb-dec/ttusbdecfe.c
+++ b/drivers/media/usb/ttusb-dec/ttusbdecfe.c
@@ -39,7 +39,7 @@ struct ttusbdecfe_state {
 
 
 static int ttusbdecfe_dvbs_read_status(struct dvb_frontend *fe,
-	fe_status_t *status)
+				       enum fe_status *status)
 {
 	*status = FE_HAS_SIGNAL | FE_HAS_VITERBI |
 		FE_HAS_SYNC | FE_HAS_CARRIER | FE_HAS_LOCK;
@@ -48,7 +48,7 @@ static int ttusbdecfe_dvbs_read_status(struct dvb_frontend *fe,
 
 
 static int ttusbdecfe_dvbt_read_status(struct dvb_frontend *fe,
-	fe_status_t *status)
+				       enum fe_status *status)
 {
 	struct ttusbdecfe_state* state = fe->demodulator_priv;
 	u8 b[] = { 0x00, 0x00, 0x00, 0x00,
@@ -169,7 +169,8 @@ static int ttusbdecfe_dvbs_diseqc_send_master_cmd(struct dvb_frontend* fe, struc
 }
 
 
-static int ttusbdecfe_dvbs_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int ttusbdecfe_dvbs_set_tone(struct dvb_frontend *fe,
+				    enum fe_sec_tone_mode tone)
 {
 	struct ttusbdecfe_state* state = (struct ttusbdecfe_state*) fe->demodulator_priv;
 
@@ -179,7 +180,8 @@ static int ttusbdecfe_dvbs_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t
 }
 
 
-static int ttusbdecfe_dvbs_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int ttusbdecfe_dvbs_set_voltage(struct dvb_frontend *fe,
+				       enum fe_sec_voltage voltage)
 {
 	struct ttusbdecfe_state* state = (struct ttusbdecfe_state*) fe->demodulator_priv;
 
diff --git a/drivers/staging/media/mn88472/mn88472.c b/drivers/staging/media/mn88472/mn88472.c
index 6863c431c648..a8d45f44765c 100644
--- a/drivers/staging/media/mn88472/mn88472.c
+++ b/drivers/staging/media/mn88472/mn88472.c
@@ -218,7 +218,7 @@ err:
 	return ret;
 }
 
-static int mn88472_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int mn88472_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct mn88472_dev *dev = i2c_get_clientdata(client);
diff --git a/drivers/staging/media/mn88472/mn88472_priv.h b/drivers/staging/media/mn88472/mn88472_priv.h
index 9ba8c8b3823e..1a0de9e46b66 100644
--- a/drivers/staging/media/mn88472/mn88472_priv.h
+++ b/drivers/staging/media/mn88472/mn88472_priv.h
@@ -29,7 +29,7 @@ struct mn88472_dev {
 	struct regmap *regmap[3];
 	struct dvb_frontend fe;
 	u16 i2c_wr_max;
-	fe_delivery_system_t delivery_system;
+	enum fe_delivery_system delivery_system;
 	bool warm; /* FW running */
 	u32 xtal;
 	int ts_mode;
diff --git a/drivers/staging/media/mn88473/mn88473.c b/drivers/staging/media/mn88473/mn88473.c
index 8b6736c70057..f9146a146d07 100644
--- a/drivers/staging/media/mn88473/mn88473.c
+++ b/drivers/staging/media/mn88473/mn88473.c
@@ -167,7 +167,7 @@ err:
 	return ret;
 }
 
-static int mn88473_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int mn88473_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct mn88473_dev *dev = i2c_get_clientdata(client);
diff --git a/drivers/staging/media/mn88473/mn88473_priv.h b/drivers/staging/media/mn88473/mn88473_priv.h
index ef6f01323ac9..54beb4241ccf 100644
--- a/drivers/staging/media/mn88473/mn88473_priv.h
+++ b/drivers/staging/media/mn88473/mn88473_priv.h
@@ -29,7 +29,7 @@ struct mn88473_dev {
 	struct regmap *regmap[3];
 	struct dvb_frontend fe;
 	u16 i2c_wr_max;
-	fe_delivery_system_t delivery_system;
+	enum fe_delivery_system delivery_system;
 	bool warm; /* FW running */
 	u32 xtal;
 };
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index a36d802fae0c..7f829c92dd64 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -77,7 +77,7 @@ typedef enum fe_caps fe_caps_t;
 
 struct dvb_frontend_info {
 	char       name[128];
-	fe_type_t  type;			/* DEPRECATED. Use DTV_ENUM_DELSYS instead */
+	enum fe_type type;	/* DEPRECATED. Use DTV_ENUM_DELSYS instead */
 	__u32      frequency_min;
 	__u32      frequency_max;
 	__u32      frequency_stepsize;
@@ -86,7 +86,7 @@ struct dvb_frontend_info {
 	__u32      symbol_rate_max;
 	__u32      symbol_rate_tolerance;	/* ppm */
 	__u32      notifier_delay;		/* DEPRECATED */
-	fe_caps_t  caps;
+	enum fe_caps caps;
 };
 
 
-- 
cgit v1.2.3


From af6392dec9421ad9de973d1f0558813f52537eba Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 15:00:17 -0300
Subject: [media] frontend: Move legacy API enums/structs to the end

In order to better organize the header file, move the legacy
API (DVBv3) support to the end, just before the ioctl definitions.

This way, we can use just one #if for all of them.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 116 +++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 58 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 7f829c92dd64..75605a7670a9 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -216,19 +216,6 @@ enum fe_transmit_mode {
 
 typedef enum fe_transmit_mode fe_transmit_mode_t;
 
-#if defined(__DVB_CORE__) || !defined (__KERNEL__)
-enum fe_bandwidth {
-	BANDWIDTH_8_MHZ,
-	BANDWIDTH_7_MHZ,
-	BANDWIDTH_6_MHZ,
-	BANDWIDTH_AUTO,
-	BANDWIDTH_5_MHZ,
-	BANDWIDTH_10_MHZ,
-	BANDWIDTH_1_712_MHZ,
-};
-
-typedef enum fe_bandwidth fe_bandwidth_t;
-#endif
 
 enum fe_guard_interval {
 	GUARD_INTERVAL_1_32,
@@ -263,51 +250,6 @@ enum fe_interleaving {
 	INTERLEAVING_720,
 };
 
-#if defined(__DVB_CORE__) || !defined (__KERNEL__)
-struct dvb_qpsk_parameters {
-	__u32		symbol_rate;  /* symbol rate in Symbols per second */
-	fe_code_rate_t	fec_inner;    /* forward error correction (see above) */
-};
-
-struct dvb_qam_parameters {
-	__u32		symbol_rate; /* symbol rate in Symbols per second */
-	fe_code_rate_t	fec_inner;   /* forward error correction (see above) */
-	fe_modulation_t	modulation;  /* modulation type (see above) */
-};
-
-struct dvb_vsb_parameters {
-	fe_modulation_t	modulation;  /* modulation type (see above) */
-};
-
-struct dvb_ofdm_parameters {
-	fe_bandwidth_t      bandwidth;
-	fe_code_rate_t      code_rate_HP;  /* high priority stream code rate */
-	fe_code_rate_t      code_rate_LP;  /* low priority stream code rate */
-	fe_modulation_t     constellation; /* modulation type (see above) */
-	fe_transmit_mode_t  transmission_mode;
-	fe_guard_interval_t guard_interval;
-	fe_hierarchy_t      hierarchy_information;
-};
-
-
-struct dvb_frontend_parameters {
-	__u32 frequency;     /* (absolute) frequency in Hz for QAM/OFDM/ATSC */
-			     /* intermediate frequency in kHz for QPSK */
-	fe_spectral_inversion_t inversion;
-	union {
-		struct dvb_qpsk_parameters qpsk;
-		struct dvb_qam_parameters  qam;
-		struct dvb_ofdm_parameters ofdm;
-		struct dvb_vsb_parameters vsb;
-	} u;
-};
-
-struct dvb_frontend_event {
-	fe_status_t status;
-	struct dvb_frontend_parameters parameters;
-};
-#endif
-
 /* S2API Commands */
 #define DTV_UNDEFINED		0
 #define DTV_TUNE		1
@@ -582,6 +524,64 @@ struct dtv_properties {
 	struct dtv_property *props;
 };
 
+#if defined(__DVB_CORE__) || !defined (__KERNEL__)
+
+enum fe_bandwidth {
+	BANDWIDTH_8_MHZ,
+	BANDWIDTH_7_MHZ,
+	BANDWIDTH_6_MHZ,
+	BANDWIDTH_AUTO,
+	BANDWIDTH_5_MHZ,
+	BANDWIDTH_10_MHZ,
+	BANDWIDTH_1_712_MHZ,
+};
+
+typedef enum fe_bandwidth fe_bandwidth_t;
+
+struct dvb_qpsk_parameters {
+	__u32		symbol_rate;  /* symbol rate in Symbols per second */
+	fe_code_rate_t	fec_inner;    /* forward error correction (see above) */
+};
+
+struct dvb_qam_parameters {
+	__u32		symbol_rate; /* symbol rate in Symbols per second */
+	fe_code_rate_t	fec_inner;   /* forward error correction (see above) */
+	fe_modulation_t	modulation;  /* modulation type (see above) */
+};
+
+struct dvb_vsb_parameters {
+	fe_modulation_t	modulation;  /* modulation type (see above) */
+};
+
+struct dvb_ofdm_parameters {
+	fe_bandwidth_t      bandwidth;
+	fe_code_rate_t      code_rate_HP;  /* high priority stream code rate */
+	fe_code_rate_t      code_rate_LP;  /* low priority stream code rate */
+	fe_modulation_t     constellation; /* modulation type (see above) */
+	fe_transmit_mode_t  transmission_mode;
+	fe_guard_interval_t guard_interval;
+	fe_hierarchy_t      hierarchy_information;
+};
+
+
+struct dvb_frontend_parameters {
+	__u32 frequency;     /* (absolute) frequency in Hz for QAM/OFDM/ATSC */
+			     /* intermediate frequency in kHz for QPSK */
+	fe_spectral_inversion_t inversion;
+	union {
+		struct dvb_qpsk_parameters qpsk;
+		struct dvb_qam_parameters  qam;
+		struct dvb_ofdm_parameters ofdm;
+		struct dvb_vsb_parameters vsb;
+	} u;
+};
+
+struct dvb_frontend_event {
+	fe_status_t status;
+	struct dvb_frontend_parameters parameters;
+};
+#endif
+
 #define FE_SET_PROPERTY		   _IOW('o', 82, struct dtv_properties)
 #define FE_GET_PROPERTY		   _IOR('o', 83, struct dtv_properties)
 
-- 
cgit v1.2.3


From b1e28ffaebb96feb2d3a07be9468a52d0782d427 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 15:01:15 -0300
Subject: [media] frontend: move legacy typedefs to the end

Just userspace need those typedefs. So, put it in the compat part
of the header.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 59 +++++++++++----------------------------
 1 file changed, 16 insertions(+), 43 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 75605a7670a9..46c7fd1143a5 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -35,9 +35,6 @@ enum fe_type {
 	FE_ATSC
 };
 
-typedef enum fe_type fe_type_t;
-
-
 enum fe_caps {
 	FE_IS_STUPID			= 0,
 	FE_CAN_INVERSION_AUTO		= 0x1,
@@ -72,9 +69,6 @@ enum fe_caps {
 	FE_CAN_MUTE_TS			= 0x80000000  /* frontend can stop spurious TS data output */
 };
 
-typedef enum fe_caps fe_caps_t;
-
-
 struct dvb_frontend_info {
 	char       name[128];
 	enum fe_type type;	/* DEPRECATED. Use DTV_ENUM_DELSYS instead */
@@ -99,39 +93,28 @@ struct dvb_diseqc_master_cmd {
 	__u8 msg_len;	/*  valid values are 3...6  */
 };
 
-
 struct dvb_diseqc_slave_reply {
 	__u8 msg [4];	/*  { framing, data [3] } */
 	__u8 msg_len;	/*  valid values are 0...4, 0 means no msg  */
 	int  timeout;	/*  return from ioctl after timeout ms with */
 };			/*  errorcode when no message was received  */
 
-
 enum fe_sec_voltage {
 	SEC_VOLTAGE_13,
 	SEC_VOLTAGE_18,
 	SEC_VOLTAGE_OFF
 };
 
-typedef enum fe_sec_voltage fe_sec_voltage_t;
-
-
 enum fe_sec_tone_mode {
 	SEC_TONE_ON,
 	SEC_TONE_OFF
 };
 
-typedef enum fe_sec_tone_mode fe_sec_tone_mode_t;
-
-
 enum fe_sec_mini_cmd {
 	SEC_MINI_A,
 	SEC_MINI_B
 };
 
-typedef enum fe_sec_mini_cmd fe_sec_mini_cmd_t;
-
-
 /**
  * enum fe_status - enumerates the possible frontend status
  * @FE_HAS_SIGNAL:	found something above the noise level
@@ -143,7 +126,6 @@ typedef enum fe_sec_mini_cmd fe_sec_mini_cmd_t;
  * @FE_REINIT:		frontend was reinitialized, application is recommended
  *			to reset DiSEqC, tone and parameters
  */
-
 enum fe_status {
 	FE_HAS_SIGNAL		= 0x01,
 	FE_HAS_CARRIER		= 0x02,
@@ -154,16 +136,12 @@ enum fe_status {
 	FE_REINIT		= 0x40,
 };
 
-typedef enum fe_status fe_status_t;
-
 enum fe_spectral_inversion {
 	INVERSION_OFF,
 	INVERSION_ON,
 	INVERSION_AUTO
 };
 
-typedef enum fe_spectral_inversion fe_spectral_inversion_t;
-
 enum fe_code_rate {
 	FEC_NONE = 0,
 	FEC_1_2,
@@ -180,9 +158,6 @@ enum fe_code_rate {
 	FEC_2_5,
 };
 
-typedef enum fe_code_rate fe_code_rate_t;
-
-
 enum fe_modulation {
 	QPSK,
 	QAM_16,
@@ -200,8 +175,6 @@ enum fe_modulation {
 	QAM_4_NR,
 };
 
-typedef enum fe_modulation fe_modulation_t;
-
 enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_8K,
@@ -214,9 +187,6 @@ enum fe_transmit_mode {
 	TRANSMISSION_MODE_C3780,
 };
 
-typedef enum fe_transmit_mode fe_transmit_mode_t;
-
-
 enum fe_guard_interval {
 	GUARD_INTERVAL_1_32,
 	GUARD_INTERVAL_1_16,
@@ -231,8 +201,6 @@ enum fe_guard_interval {
 	GUARD_INTERVAL_PN945,
 };
 
-typedef enum fe_guard_interval fe_guard_interval_t;
-
 enum fe_hierarchy {
 	HIERARCHY_NONE,
 	HIERARCHY_1,
@@ -241,8 +209,6 @@ enum fe_hierarchy {
 	HIERARCHY_AUTO
 };
 
-typedef enum fe_hierarchy fe_hierarchy_t;
-
 enum fe_interleaving {
 	INTERLEAVING_NONE,
 	INTERLEAVING_AUTO,
@@ -349,8 +315,6 @@ enum fe_pilot {
 	PILOT_AUTO,
 };
 
-typedef enum fe_pilot fe_pilot_t;
-
 enum fe_rolloff {
 	ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
 	ROLLOFF_20,
@@ -358,8 +322,6 @@ enum fe_rolloff {
 	ROLLOFF_AUTO,
 };
 
-typedef enum fe_rolloff fe_rolloff_t;
-
 enum fe_delivery_system {
 	SYS_UNDEFINED,
 	SYS_DVBC_ANNEX_A,
@@ -382,8 +344,6 @@ enum fe_delivery_system {
 	SYS_DVBC_ANNEX_C,
 };
 
-typedef enum fe_delivery_system fe_delivery_system_t;
-
 /* backward compatibility */
 #define SYS_DVBC_ANNEX_AC	SYS_DVBC_ANNEX_A
 #define SYS_DMBTH SYS_DTMB /* DMB-TH is legacy name, use DTMB instead */
@@ -536,7 +496,23 @@ enum fe_bandwidth {
 	BANDWIDTH_1_712_MHZ,
 };
 
+/* This is needed for legacy userspace support */
+typedef enum fe_sec_voltage fe_sec_voltage_t;
+typedef enum fe_caps fe_caps_t;
+typedef enum fe_type fe_type_t;
+typedef enum fe_sec_tone_mode fe_sec_tone_mode_t;
+typedef enum fe_sec_mini_cmd fe_sec_mini_cmd_t;
+typedef enum fe_status fe_status_t;
+typedef enum fe_spectral_inversion fe_spectral_inversion_t;
+typedef enum fe_code_rate fe_code_rate_t;
+typedef enum fe_modulation fe_modulation_t;
+typedef enum fe_transmit_mode fe_transmit_mode_t;
 typedef enum fe_bandwidth fe_bandwidth_t;
+typedef enum fe_guard_interval fe_guard_interval_t;
+typedef enum fe_hierarchy fe_hierarchy_t;
+typedef enum fe_pilot fe_pilot_t;
+typedef enum fe_rolloff fe_rolloff_t;
+typedef enum fe_delivery_system fe_delivery_system_t;
 
 struct dvb_qpsk_parameters {
 	__u32		symbol_rate;  /* symbol rate in Symbols per second */
@@ -563,7 +539,6 @@ struct dvb_ofdm_parameters {
 	fe_hierarchy_t      hierarchy_information;
 };
 
-
 struct dvb_frontend_parameters {
 	__u32 frequency;     /* (absolute) frequency in Hz for QAM/OFDM/ATSC */
 			     /* intermediate frequency in kHz for QPSK */
@@ -585,7 +560,6 @@ struct dvb_frontend_event {
 #define FE_SET_PROPERTY		   _IOW('o', 82, struct dtv_properties)
 #define FE_GET_PROPERTY		   _IOR('o', 83, struct dtv_properties)
 
-
 /**
  * When set, this flag will disable any zigzagging or other "normal" tuning
  * behaviour. Additionally, there will be no automatic monitoring of the lock
@@ -595,7 +569,6 @@ struct dvb_frontend_event {
  */
 #define FE_TUNE_MODE_ONESHOT 0x01
 
-
 #define FE_GET_INFO		   _IOR('o', 61, struct dvb_frontend_info)
 
 #define FE_DISEQC_RESET_OVERLOAD   _IO('o', 62)
-- 
cgit v1.2.3


From 76add03db97c11142e30d1e63c6f19e179b8b6c6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 16:28:33 -0300
Subject: [media] frontend: Fix a typo at the comments

The description of struct dtv_stats has a spmall typo:
	FE_SCALE_DECIBELS instead of FE_SCALE_DECIBEL

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 46c7fd1143a5..0380e62fc8b2 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -435,13 +435,13 @@ enum fecap_scale_params {
  *
  * In other words, for ISDB, those values should be filled like:
  *	u.st.stat.svalue[0] = global statistics;
- *	u.st.stat.scale[0] = FE_SCALE_DECIBELS;
+ *	u.st.stat.scale[0] = FE_SCALE_DECIBEL;
  *	u.st.stat.value[1] = layer A statistics;
  *	u.st.stat.scale[1] = FE_SCALE_NOT_AVAILABLE (if not available);
  *	u.st.stat.svalue[2] = layer B statistics;
- *	u.st.stat.scale[2] = FE_SCALE_DECIBELS;
+ *	u.st.stat.scale[2] = FE_SCALE_DECIBEL;
  *	u.st.stat.svalue[3] = layer C statistics;
- *	u.st.stat.scale[3] = FE_SCALE_DECIBELS;
+ *	u.st.stat.scale[3] = FE_SCALE_DECIBEL;
  *	u.st.len = 4;
  */
 struct dtv_stats {
-- 
cgit v1.2.3


From 81a7c6d9bb14956d9feb619ae724aaa90ea55288 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 16:33:30 -0300
Subject: [media] dvb: frontend.h: improve dvb_frontent_parameters comment

The comment for struct dvb_frontend_parameters is weird, as it
mixes delivery system name (ATSC) with modulation names
(QPSK, QAM, OFDM).

Use delivery system names there on the frequency comment, as this
is clearer, specially after 2GEN delivery systems.

While here, add comments at the union, to make live easier for ones
that may try to understand the convention used by the legacy API.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 0380e62fc8b2..e764fd8b7e35 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -540,14 +540,14 @@ struct dvb_ofdm_parameters {
 };
 
 struct dvb_frontend_parameters {
-	__u32 frequency;     /* (absolute) frequency in Hz for QAM/OFDM/ATSC */
-			     /* intermediate frequency in kHz for QPSK */
+	__u32 frequency;     /* (absolute) frequency in Hz for DVB-C/DVB-T/ATSC */
+			     /* intermediate frequency in kHz for DVB-S */
 	fe_spectral_inversion_t inversion;
 	union {
-		struct dvb_qpsk_parameters qpsk;
-		struct dvb_qam_parameters  qam;
-		struct dvb_ofdm_parameters ofdm;
-		struct dvb_vsb_parameters vsb;
+		struct dvb_qpsk_parameters qpsk;	/* DVB-S */
+		struct dvb_qam_parameters  qam;		/* DVB-C */
+		struct dvb_ofdm_parameters ofdm;	/* DVB-T */
+		struct dvb_vsb_parameters vsb;		/* ATSC */
 	} u;
 };
 
-- 
cgit v1.2.3


From 486ef85e93a4829e8f49d3bfe3ee3a29379868d3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 16:44:21 -0300
Subject: [media] dvb: frontend.h: add a note for the deprecated enums/structs

Let be clear, at the header, about what got deprecated.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index e764fd8b7e35..00a20cd21ee2 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -486,6 +486,12 @@ struct dtv_properties {
 
 #if defined(__DVB_CORE__) || !defined (__KERNEL__)
 
+/*
+ * DEPRECATED: The DVBv3 ioctls, structs and enums should not be used on
+ * newer programs, as it doesn't support the second generation of digital
+ * TV standards, nor supports newer delivery systems.
+ */
+
 enum fe_bandwidth {
 	BANDWIDTH_8_MHZ,
 	BANDWIDTH_7_MHZ,
-- 
cgit v1.2.3


From 6c72edab024757267f43de337f093203efea714b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 19:34:50 -0300
Subject: [media] dvb: dmx.h: don't use anonymous enums

There are several anonymous enums here, used via a typedef.

Well, we don't like typedefs on Kernel, so let's de-anonimize
those enums. Then, latter, we may be able to get rid of the
typedefs, at least from Kernelspace.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/dmx.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h
index b4fb650d9d4f..ece3661a3cac 100644
--- a/include/uapi/linux/dvb/dmx.h
+++ b/include/uapi/linux/dvb/dmx.h
@@ -32,7 +32,7 @@
 
 #define DMX_FILTER_SIZE 16
 
-typedef enum
+typedef enum dmx_output
 {
 	DMX_OUT_DECODER, /* Streaming directly to decoder. */
 	DMX_OUT_TAP,     /* Output going to a memory buffer */
@@ -44,7 +44,7 @@ typedef enum
 } dmx_output_t;
 
 
-typedef enum
+typedef enum dmx_input
 {
 	DMX_IN_FRONTEND, /* Input from a front-end device.  */
 	DMX_IN_DVR       /* Input from the logical DVR device.  */
@@ -122,7 +122,7 @@ typedef struct dmx_caps {
 	int num_decoders;
 } dmx_caps_t;
 
-typedef enum {
+typedef enum dmx_source {
 	DMX_SOURCE_FRONT0 = 0,
 	DMX_SOURCE_FRONT1,
 	DMX_SOURCE_FRONT2,
-- 
cgit v1.2.3


From 6eab8043f9b91a998dc06ce0b770b9664e51bfc3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 19:29:43 -0300
Subject: [media] DocBook: Change format for enum dmx_output documentation

Use a table for the Demux output. No new information added
here. They were all merged inside the table.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/Makefile      | 13 +++----
 Documentation/DocBook/media/dvb/demux.xml | 57 ++++++++++++++++++++-----------
 include/uapi/linux/dvb/dmx.h              |  6 ++--
 3 files changed, 47 insertions(+), 29 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile
index e7d44a0b00d6..23996f88cd58 100644
--- a/Documentation/DocBook/media/Makefile
+++ b/Documentation/DocBook/media/Makefile
@@ -88,12 +88,8 @@ ENUMS = \
 
 ENUM_DEFS = \
 	$(shell perl -e 'open IN,"cat @ARGV| cpp -fpreprocessed |"; while (<IN>) { if ($$enum) {print "$$1\n" if (/\s*([A-Z]\S+)\b/); } $$enum = 0 if ($$enum && /^\}/); $$enum = 1 if(/^\s*enum\s/); }; close IN;' \
-		$(srctree)/include/uapi/linux/dvb/audio.h \
-		$(srctree)/include/uapi/linux/dvb/ca.h \
 		$(srctree)/include/uapi/linux/dvb/dmx.h \
-		$(srctree)/include/uapi/linux/dvb/frontend.h \
-		$(srctree)/include/uapi/linux/dvb/net.h \
-		$(srctree)/include/uapi/linux/dvb/video.h)
+		$(srctree)/include/uapi/linux/dvb/frontend.h)
 
 STRUCTS = \
 	$(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/videodev2.h) \
@@ -251,9 +247,14 @@ $(MEDIA_OBJ_DIR)/dmx.h.xml: $(srctree)/include/uapi/linux/dvb/dmx.h $(MEDIA_OBJ_
 	@(					\
 	echo "<programlisting>") > $@
 	@(					\
+	for ident in $(ENUM_DEFS) ; do		\
+	  entity=`echo $$ident | tr _ -` ;	\
+	  r="$$r s/([^\w\-])$$ident([^\w\-])/\1\&$$entity\;\2/g;";\
+	done;					\
 	expand --tabs=8 < $< |			\
 	  sed $(ESCAPE) $(DVB_DOCUMENTED) |	\
-	  sed 's/i\.e\./&ie;/') >> $@
+	  sed 's/i\.e\./&ie;/' |		\
+	  perl -ne "$$r print $$_;") >> $@
 	@(					\
 	echo "</programlisting>") >> $@
 
diff --git a/Documentation/DocBook/media/dvb/demux.xml b/Documentation/DocBook/media/dvb/demux.xml
index 11a831d58643..34f2fb1cd601 100644
--- a/Documentation/DocBook/media/dvb/demux.xml
+++ b/Documentation/DocBook/media/dvb/demux.xml
@@ -8,26 +8,43 @@ accessed by including <constant>linux/dvb/dmx.h</constant> in your application.
 <title>Demux Data Types</title>
 
 <section id="dmx-output-t">
-<title>dmx_output_t</title>
-<programlisting>
-typedef enum
-{
-	DMX_OUT_DECODER, /&#x22C6; Streaming directly to decoder. &#x22C6;/
-	DMX_OUT_TAP,     /&#x22C6; Output going to a memory buffer &#x22C6;/
-			 /&#x22C6; (to be retrieved via the read command).&#x22C6;/
-	DMX_OUT_TS_TAP,  /&#x22C6; Output multiplexed into a new TS  &#x22C6;/
-			 /&#x22C6; (to be retrieved by reading from the &#x22C6;/
-			 /&#x22C6; logical DVR device).                 &#x22C6;/
-	DMX_OUT_TSDEMUX_TAP /&#x22C6; Like TS_TAP but retrieved from the DMX device &#x22C6;/
-} dmx_output_t;
-</programlisting>
-<para><constant>DMX_OUT_TAP</constant> delivers the stream output to the demux device on which the ioctl is
-called.
-</para>
-<para><constant>DMX_OUT_TS_TAP</constant> routes output to the logical DVR device <constant>/dev/dvb/adapter?/dvr?</constant>,
-which delivers a TS multiplexed from all filters for which <constant>DMX_OUT_TS_TAP</constant> was
-specified.
-</para>
+<title>Output for the demux</title>
+
+<table pgwide="1" frame="none" id="dmx-output">
+    <title>enum dmx_output</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+		<entry align="char" id="DMX-OUT-DECODER">DMX_OUT_DECODER</entry>
+		<entry>Streaming directly to decoder.</entry>
+	</row><row>
+		<entry align="char" id="DMX-OUT-TAP">DMX_OUT_TAP</entry>
+		<entry>Output going to a memory buffer (to be retrieved via the
+		    read command). Delivers the stream output to the demux
+		    device on which the ioctl is called.</entry>
+	</row><row>
+		<entry align="char" id="DMX-OUT-TS-TAP">DMX_OUT_TS_TAP</entry>
+		<entry>Output multiplexed into a new TS (to be retrieved by
+		    reading from the logical DVR device). Routes output to the
+		    logical DVR device <constant>/dev/dvb/adapter?/dvr?</constant>,
+		    which delivers a TS multiplexed from all filters for which
+		    <constant>DMX_OUT_TS_TAP</constant> was specified.</entry>
+	</row><row>
+		<entry align="char" id="DMX-OUT-TSDEMUX-TAP">DMX_OUT_TSDEMUX_TAP</entry>
+		<entry>Like &DMX-OUT-TS-TAP; but retrieved from the DMX
+		    device.</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+
 </section>
 
 <section id="dmx-input-t">
diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h
index ece3661a3cac..427e4899ed69 100644
--- a/include/uapi/linux/dvb/dmx.h
+++ b/include/uapi/linux/dvb/dmx.h
@@ -32,7 +32,7 @@
 
 #define DMX_FILTER_SIZE 16
 
-typedef enum dmx_output
+enum dmx_output
 {
 	DMX_OUT_DECODER, /* Streaming directly to decoder. */
 	DMX_OUT_TAP,     /* Output going to a memory buffer */
@@ -41,8 +41,9 @@ typedef enum dmx_output
 			 /* (to be retrieved by reading from the */
 			 /* logical DVR device).                 */
 	DMX_OUT_TSDEMUX_TAP /* Like TS_TAP but retrieved from the DMX device */
-} dmx_output_t;
+};
 
+typedef enum dmx_output dmx_output_t;
 
 typedef enum dmx_input
 {
@@ -139,7 +140,6 @@ struct dmx_stc {
 	__u64 stc;		/* output: stc in 'base'*90 kHz units */
 };
 
-
 #define DMX_START                _IO('o', 41)
 #define DMX_STOP                 _IO('o', 42)
 #define DMX_SET_FILTER           _IOW('o', 43, struct dmx_sct_filter_params)
-- 
cgit v1.2.3


From 48a6092fb41fab5b80064c3fac786f8ec86457a3 Mon Sep 17 00:00:00 2001
From: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Date: Wed, 10 Jun 2015 21:19:36 +0200
Subject: serial: stm32-usart: Add STM32 USART Driver

This drivers adds support to the STM32 USART controller, which is a
standard serial driver.

Tested-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Reviewed-by: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig       |  17 +
 drivers/tty/serial/Makefile      |   1 +
 drivers/tty/serial/stm32-usart.c | 739 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_core.h |   3 +
 4 files changed, 760 insertions(+)
 create mode 100644 drivers/tty/serial/stm32-usart.c

(limited to 'include/uapi/linux')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index da45877e79fb..a74dabc8f108 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1608,6 +1608,23 @@ config SERIAL_SPRD_CONSOLE
 	  with "earlycon" on the kernel command line. The console is
 	  enabled when early_param is processed.
 
+config SERIAL_STM32
+	tristate "STMicroelectronics STM32 serial port support"
+	select SERIAL_CORE
+	depends on ARM || COMPILE_TEST
+	help
+	  This driver is for the on-chip Serial Controller on
+	  STMicroelectronics STM32 MCUs.
+	  USART supports Rx & Tx functionality.
+	  It support all industry standard baud rates.
+
+	  If unsure, say N.
+
+config SERIAL_STM32_CONSOLE
+	bool "Support for console on STM32"
+	depends on SERIAL_STM32=y
+	select SERIAL_CORE_CONSOLE
+
 endmenu
 
 config SERIAL_MCTRL_GPIO
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index d296cee2e132..5ab41119b3dc 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_SERIAL_FSL_LPUART)	+= fsl_lpuart.o
 obj-$(CONFIG_SERIAL_CONEXANT_DIGICOLOR)	+= digicolor-usart.o
 obj-$(CONFIG_SERIAL_MEN_Z135)	+= men_z135_uart.o
 obj-$(CONFIG_SERIAL_SPRD) += sprd_serial.o
+obj-$(CONFIG_SERIAL_STM32)	+= stm32-usart.o
 
 # GPIOLIB helpers for modem control lines
 obj-$(CONFIG_SERIAL_MCTRL_GPIO)	+= serial_mctrl_gpio.o
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
new file mode 100644
index 000000000000..4a6eab6da63e
--- /dev/null
+++ b/drivers/tty/serial/stm32-usart.c
@@ -0,0 +1,739 @@
+/*
+ * Copyright (C) Maxime Coquelin 2015
+ * Author:  Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * License terms:  GNU General Public License (GPL), version 2
+ *
+ * Inspired by st-asc.c from STMicroelectronics (c)
+ */
+
+#if defined(CONFIG_SERIAL_STM32_USART_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/module.h>
+#include <linux/serial.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/serial_core.h>
+#include <linux/clk.h>
+
+#define DRIVER_NAME "stm32-usart"
+
+/* Register offsets */
+#define USART_SR		0x00
+#define USART_DR		0x04
+#define USART_BRR		0x08
+#define USART_CR1		0x0c
+#define USART_CR2		0x10
+#define USART_CR3		0x14
+#define USART_GTPR		0x18
+
+/* USART_SR */
+#define USART_SR_PE		BIT(0)
+#define USART_SR_FE		BIT(1)
+#define USART_SR_NF		BIT(2)
+#define USART_SR_ORE		BIT(3)
+#define USART_SR_IDLE		BIT(4)
+#define USART_SR_RXNE		BIT(5)
+#define USART_SR_TC		BIT(6)
+#define USART_SR_TXE		BIT(7)
+#define USART_SR_LBD		BIT(8)
+#define USART_SR_CTS		BIT(9)
+#define USART_SR_ERR_MASK	(USART_SR_LBD | USART_SR_ORE | \
+				 USART_SR_FE | USART_SR_PE)
+/* Dummy bits */
+#define USART_SR_DUMMY_RX	BIT(16)
+
+/* USART_DR */
+#define USART_DR_MASK		GENMASK(8, 0)
+
+/* USART_BRR */
+#define USART_BRR_DIV_F_MASK	GENMASK(3, 0)
+#define USART_BRR_DIV_M_MASK	GENMASK(15, 4)
+#define USART_BRR_DIV_M_SHIFT	4
+
+/* USART_CR1 */
+#define USART_CR1_SBK		BIT(0)
+#define USART_CR1_RWU		BIT(1)
+#define USART_CR1_RE		BIT(2)
+#define USART_CR1_TE		BIT(3)
+#define USART_CR1_IDLEIE	BIT(4)
+#define USART_CR1_RXNEIE	BIT(5)
+#define USART_CR1_TCIE		BIT(6)
+#define USART_CR1_TXEIE		BIT(7)
+#define USART_CR1_PEIE		BIT(8)
+#define USART_CR1_PS		BIT(9)
+#define USART_CR1_PCE		BIT(10)
+#define USART_CR1_WAKE		BIT(11)
+#define USART_CR1_M		BIT(12)
+#define USART_CR1_UE		BIT(13)
+#define USART_CR1_OVER8		BIT(15)
+#define USART_CR1_IE_MASK	GENMASK(8, 4)
+
+/* USART_CR2 */
+#define USART_CR2_ADD_MASK	GENMASK(3, 0)
+#define USART_CR2_LBDL		BIT(5)
+#define USART_CR2_LBDIE		BIT(6)
+#define USART_CR2_LBCL		BIT(8)
+#define USART_CR2_CPHA		BIT(9)
+#define USART_CR2_CPOL		BIT(10)
+#define USART_CR2_CLKEN		BIT(11)
+#define USART_CR2_STOP_2B	BIT(13)
+#define USART_CR2_STOP_MASK	GENMASK(13, 12)
+#define USART_CR2_LINEN		BIT(14)
+
+/* USART_CR3 */
+#define USART_CR3_EIE		BIT(0)
+#define USART_CR3_IREN		BIT(1)
+#define USART_CR3_IRLP		BIT(2)
+#define USART_CR3_HDSEL		BIT(3)
+#define USART_CR3_NACK		BIT(4)
+#define USART_CR3_SCEN		BIT(5)
+#define USART_CR3_DMAR		BIT(6)
+#define USART_CR3_DMAT		BIT(7)
+#define USART_CR3_RTSE		BIT(8)
+#define USART_CR3_CTSE		BIT(9)
+#define USART_CR3_CTSIE		BIT(10)
+#define USART_CR3_ONEBIT	BIT(11)
+
+/* USART_GTPR */
+#define USART_GTPR_PSC_MASK	GENMASK(7, 0)
+#define USART_GTPR_GT_MASK	GENMASK(15, 8)
+
+#define DRIVER_NAME "stm32-usart"
+#define STM32_SERIAL_NAME "ttyS"
+#define STM32_MAX_PORTS 6
+
+struct stm32_port {
+	struct uart_port port;
+	struct clk *clk;
+	bool hw_flow_control;
+};
+
+static struct stm32_port stm32_ports[STM32_MAX_PORTS];
+static struct uart_driver stm32_usart_driver;
+
+static void stm32_stop_tx(struct uart_port *port);
+
+static inline struct stm32_port *to_stm32_port(struct uart_port *port)
+{
+	return container_of(port, struct stm32_port, port);
+}
+
+static void stm32_set_bits(struct uart_port *port, u32 reg, u32 bits)
+{
+	u32 val;
+
+	val = readl_relaxed(port->membase + reg);
+	val |= bits;
+	writel_relaxed(val, port->membase + reg);
+}
+
+static void stm32_clr_bits(struct uart_port *port, u32 reg, u32 bits)
+{
+	u32 val;
+
+	val = readl_relaxed(port->membase + reg);
+	val &= ~bits;
+	writel_relaxed(val, port->membase + reg);
+}
+
+static void stm32_receive_chars(struct uart_port *port)
+{
+	struct tty_port *tport = &port->state->port;
+	unsigned long c;
+	u32 sr;
+	char flag;
+
+	if (port->irq_wake)
+		pm_wakeup_event(tport->tty->dev, 0);
+
+	while ((sr = readl_relaxed(port->membase + USART_SR)) & USART_SR_RXNE) {
+		sr |= USART_SR_DUMMY_RX;
+		c = readl_relaxed(port->membase + USART_DR);
+		flag = TTY_NORMAL;
+		port->icount.rx++;
+
+		if (sr & USART_SR_ERR_MASK) {
+			if (sr & USART_SR_LBD) {
+				port->icount.brk++;
+				if (uart_handle_break(port))
+					continue;
+			} else if (sr & USART_SR_ORE) {
+				port->icount.overrun++;
+			} else if (sr & USART_SR_PE) {
+				port->icount.parity++;
+			} else if (sr & USART_SR_FE) {
+				port->icount.frame++;
+			}
+
+			sr &= port->read_status_mask;
+
+			if (sr & USART_SR_LBD)
+				flag = TTY_BREAK;
+			else if (sr & USART_SR_PE)
+				flag = TTY_PARITY;
+			else if (sr & USART_SR_FE)
+				flag = TTY_FRAME;
+		}
+
+		if (uart_handle_sysrq_char(port, c))
+			continue;
+		uart_insert_char(port, sr, USART_SR_ORE, c, flag);
+	}
+
+	spin_unlock(&port->lock);
+	tty_flip_buffer_push(tport);
+	spin_lock(&port->lock);
+}
+
+static void stm32_transmit_chars(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+
+	if (port->x_char) {
+		writel_relaxed(port->x_char, port->membase + USART_DR);
+		port->x_char = 0;
+		port->icount.tx++;
+		return;
+	}
+
+	if (uart_tx_stopped(port)) {
+		stm32_stop_tx(port);
+		return;
+	}
+
+	if (uart_circ_empty(xmit)) {
+		stm32_stop_tx(port);
+		return;
+	}
+
+	writel_relaxed(xmit->buf[xmit->tail], port->membase + USART_DR);
+	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+	port->icount.tx++;
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		stm32_stop_tx(port);
+}
+
+static irqreturn_t stm32_interrupt(int irq, void *ptr)
+{
+	struct uart_port *port = ptr;
+	u32 sr;
+
+	spin_lock(&port->lock);
+
+	sr = readl_relaxed(port->membase + USART_SR);
+
+	if (sr & USART_SR_RXNE)
+		stm32_receive_chars(port);
+
+	if (sr & USART_SR_TXE)
+		stm32_transmit_chars(port);
+
+	spin_unlock(&port->lock);
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int stm32_tx_empty(struct uart_port *port)
+{
+	return readl_relaxed(port->membase + USART_SR) & USART_SR_TXE;
+}
+
+static void stm32_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
+		stm32_set_bits(port, USART_CR3, USART_CR3_RTSE);
+	else
+		stm32_clr_bits(port, USART_CR3, USART_CR3_RTSE);
+}
+
+static unsigned int stm32_get_mctrl(struct uart_port *port)
+{
+	/* This routine is used to get signals of: DCD, DSR, RI, and CTS */
+	return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS;
+}
+
+/* Transmit stop */
+static void stm32_stop_tx(struct uart_port *port)
+{
+	stm32_clr_bits(port, USART_CR1, USART_CR1_TXEIE);
+}
+
+/* There are probably characters waiting to be transmitted. */
+static void stm32_start_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+
+	if (uart_circ_empty(xmit))
+		return;
+
+	stm32_set_bits(port, USART_CR1, USART_CR1_TXEIE | USART_CR1_TE);
+}
+
+/* Throttle the remote when input buffer is about to overflow. */
+static void stm32_throttle(struct uart_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	stm32_clr_bits(port, USART_CR1, USART_CR1_RXNEIE);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/* Unthrottle the remote, the input buffer can now accept data. */
+static void stm32_unthrottle(struct uart_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	stm32_set_bits(port, USART_CR1, USART_CR1_RXNEIE);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/* Receive stop */
+static void stm32_stop_rx(struct uart_port *port)
+{
+	stm32_clr_bits(port, USART_CR1, USART_CR1_RXNEIE);
+}
+
+/* Handle breaks - ignored by us */
+static void stm32_break_ctl(struct uart_port *port, int break_state)
+{
+}
+
+static int stm32_startup(struct uart_port *port)
+{
+	const char *name = to_platform_device(port->dev)->name;
+	u32 val;
+	int ret;
+
+	ret = request_irq(port->irq, stm32_interrupt, IRQF_NO_SUSPEND,
+			  name, port);
+	if (ret)
+		return ret;
+
+	val = USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE;
+	stm32_set_bits(port, USART_CR1, val);
+
+	return 0;
+}
+
+static void stm32_shutdown(struct uart_port *port)
+{
+	u32 val;
+
+	val = USART_CR1_TXEIE | USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE;
+	stm32_set_bits(port, USART_CR1, val);
+
+	free_irq(port->irq, port);
+}
+
+static void stm32_set_termios(struct uart_port *port, struct ktermios *termios,
+			    struct ktermios *old)
+{
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	unsigned int baud;
+	u32 usartdiv, mantissa, fraction, oversampling;
+	tcflag_t cflag = termios->c_cflag;
+	u32 cr1, cr2, cr3;
+	unsigned long flags;
+
+	if (!stm32_port->hw_flow_control)
+		cflag &= ~CRTSCTS;
+
+	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 8);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Stop serial port and reset value */
+	writel_relaxed(0, port->membase + USART_CR1);
+
+	cr1 = USART_CR1_TE | USART_CR1_RE | USART_CR1_UE | USART_CR1_RXNEIE;
+	cr2 = 0;
+	cr3 = 0;
+
+	if (cflag & CSTOPB)
+		cr2 |= USART_CR2_STOP_2B;
+
+	if (cflag & PARENB) {
+		cr1 |= USART_CR1_PCE;
+		if ((cflag & CSIZE) == CS8)
+			cr1 |= USART_CR1_M;
+	}
+
+	if (cflag & PARODD)
+		cr1 |= USART_CR1_PS;
+
+	port->status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS);
+	if (cflag & CRTSCTS) {
+		port->status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS;
+		cr3 |= USART_CR3_CTSE;
+	}
+
+	usartdiv = DIV_ROUND_CLOSEST(port->uartclk, baud);
+
+	/*
+	 * The USART supports 16 or 8 times oversampling.
+	 * By default we prefer 16 times oversampling, so that the receiver
+	 * has a better tolerance to clock deviations.
+	 * 8 times oversampling is only used to achieve higher speeds.
+	 */
+	if (usartdiv < 16) {
+		oversampling = 8;
+		stm32_set_bits(port, USART_CR1, USART_CR1_OVER8);
+	} else {
+		oversampling = 16;
+		stm32_clr_bits(port, USART_CR1, USART_CR1_OVER8);
+	}
+
+	mantissa = (usartdiv / oversampling) << USART_BRR_DIV_M_SHIFT;
+	fraction = usartdiv % oversampling;
+	writel_relaxed(mantissa | fraction, port->membase + USART_BRR);
+
+	uart_update_timeout(port, cflag, baud);
+
+	port->read_status_mask = USART_SR_ORE;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= USART_SR_PE | USART_SR_FE;
+	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
+		port->read_status_mask |= USART_SR_LBD;
+
+	/* Characters to ignore */
+	port->ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask = USART_SR_PE | USART_SR_FE;
+	if (termios->c_iflag & IGNBRK) {
+		port->ignore_status_mask |= USART_SR_LBD;
+		/*
+		 * If we're ignoring parity and break indicators,
+		 * ignore overruns too (for real raw support).
+		 */
+		if (termios->c_iflag & IGNPAR)
+			port->ignore_status_mask |= USART_SR_ORE;
+	}
+
+	/* Ignore all characters if CREAD is not set */
+	if ((termios->c_cflag & CREAD) == 0)
+		port->ignore_status_mask |= USART_SR_DUMMY_RX;
+
+	writel_relaxed(cr3, port->membase + USART_CR3);
+	writel_relaxed(cr2, port->membase + USART_CR2);
+	writel_relaxed(cr1, port->membase + USART_CR1);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *stm32_type(struct uart_port *port)
+{
+	return (port->type == PORT_STM32) ? DRIVER_NAME : NULL;
+}
+
+static void stm32_release_port(struct uart_port *port)
+{
+}
+
+static int stm32_request_port(struct uart_port *port)
+{
+	return 0;
+}
+
+static void stm32_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE)
+		port->type = PORT_STM32;
+}
+
+static int
+stm32_verify_port(struct uart_port *port, struct serial_struct *ser)
+{
+	/* No user changeable parameters */
+	return -EINVAL;
+}
+
+static void stm32_pm(struct uart_port *port, unsigned int state,
+		unsigned int oldstate)
+{
+	struct stm32_port *stm32port = container_of(port,
+			struct stm32_port, port);
+	unsigned long flags = 0;
+
+	switch (state) {
+	case UART_PM_STATE_ON:
+		clk_prepare_enable(stm32port->clk);
+		break;
+	case UART_PM_STATE_OFF:
+		spin_lock_irqsave(&port->lock, flags);
+		stm32_clr_bits(port, USART_CR1, USART_CR1_UE);
+		spin_unlock_irqrestore(&port->lock, flags);
+		clk_disable_unprepare(stm32port->clk);
+		break;
+	}
+}
+
+static const struct uart_ops stm32_uart_ops = {
+	.tx_empty	= stm32_tx_empty,
+	.set_mctrl	= stm32_set_mctrl,
+	.get_mctrl	= stm32_get_mctrl,
+	.stop_tx	= stm32_stop_tx,
+	.start_tx	= stm32_start_tx,
+	.throttle	= stm32_throttle,
+	.unthrottle	= stm32_unthrottle,
+	.stop_rx	= stm32_stop_rx,
+	.break_ctl	= stm32_break_ctl,
+	.startup	= stm32_startup,
+	.shutdown	= stm32_shutdown,
+	.set_termios	= stm32_set_termios,
+	.pm		= stm32_pm,
+	.type		= stm32_type,
+	.release_port	= stm32_release_port,
+	.request_port	= stm32_request_port,
+	.config_port	= stm32_config_port,
+	.verify_port	= stm32_verify_port,
+};
+
+static int stm32_init_port(struct stm32_port *stm32port,
+			  struct platform_device *pdev)
+{
+	struct uart_port *port = &stm32port->port;
+	struct resource *res;
+	int ret;
+
+	port->iotype	= UPIO_MEM;
+	port->flags	= UPF_BOOT_AUTOCONF;
+	port->ops	= &stm32_uart_ops;
+	port->dev	= &pdev->dev;
+	port->irq	= platform_get_irq(pdev, 0);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	port->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(port->membase))
+		return PTR_ERR(port->membase);
+	port->mapbase = res->start;
+
+	spin_lock_init(&port->lock);
+
+	stm32port->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(stm32port->clk))
+		return PTR_ERR(stm32port->clk);
+
+	/* Ensure that clk rate is correct by enabling the clk */
+	ret = clk_prepare_enable(stm32port->clk);
+	if (ret)
+		return ret;
+
+	stm32port->port.uartclk = clk_get_rate(stm32port->clk);
+	if (!stm32port->port.uartclk)
+		ret = -EINVAL;
+
+	clk_disable_unprepare(stm32port->clk);
+
+	return ret;
+}
+
+static struct stm32_port *stm32_of_get_stm32_port(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	int id;
+
+	if (!np)
+		return NULL;
+
+	id = of_alias_get_id(np, "serial");
+	if (id < 0)
+		id = 0;
+
+	if (WARN_ON(id >= STM32_MAX_PORTS))
+		return NULL;
+
+	stm32_ports[id].hw_flow_control = of_property_read_bool(np,
+							"auto-flow-control");
+	stm32_ports[id].port.line = id;
+	return &stm32_ports[id];
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id stm32_match[] = {
+	{ .compatible = "st,stm32-usart", },
+	{ .compatible = "st,stm32-uart", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, stm32_match);
+#endif
+
+static int stm32_serial_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct stm32_port *stm32port;
+
+	stm32port = stm32_of_get_stm32_port(pdev);
+	if (!stm32port)
+		return -ENODEV;
+
+	ret = stm32_init_port(stm32port, pdev);
+	if (ret)
+		return ret;
+
+	ret = uart_add_one_port(&stm32_usart_driver, &stm32port->port);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, &stm32port->port);
+
+	return 0;
+}
+
+static int stm32_serial_remove(struct platform_device *pdev)
+{
+	struct uart_port *port = platform_get_drvdata(pdev);
+
+	return uart_remove_one_port(&stm32_usart_driver, port);
+}
+
+
+#ifdef CONFIG_SERIAL_STM32_CONSOLE
+static void stm32_console_putchar(struct uart_port *port, int ch)
+{
+	while (!(readl_relaxed(port->membase + USART_SR) & USART_SR_TXE))
+		cpu_relax();
+
+	writel_relaxed(ch, port->membase + USART_DR);
+}
+
+static void stm32_console_write(struct console *co, const char *s, unsigned cnt)
+{
+	struct uart_port *port = &stm32_ports[co->index].port;
+	unsigned long flags;
+	u32 old_cr1, new_cr1;
+	int locked = 1;
+
+	local_irq_save(flags);
+	if (port->sysrq)
+		locked = 0;
+	else if (oops_in_progress)
+		locked = spin_trylock(&port->lock);
+	else
+		spin_lock(&port->lock);
+
+	/* Save and disable interrupts */
+	old_cr1 = readl_relaxed(port->membase + USART_CR1);
+	new_cr1 = old_cr1 & ~USART_CR1_IE_MASK;
+	writel_relaxed(new_cr1, port->membase + USART_CR1);
+
+	uart_console_write(port, s, cnt, stm32_console_putchar);
+
+	/* Restore interrupt state */
+	writel_relaxed(old_cr1, port->membase + USART_CR1);
+
+	if (locked)
+		spin_unlock(&port->lock);
+	local_irq_restore(flags);
+}
+
+static int stm32_console_setup(struct console *co, char *options)
+{
+	struct stm32_port *stm32port;
+	int baud = 9600;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index >= STM32_MAX_PORTS)
+		return -ENODEV;
+
+	stm32port = &stm32_ports[co->index];
+
+	/*
+	 * This driver does not support early console initialization
+	 * (use ARM early printk support instead), so we only expect
+	 * this to be called during the uart port registration when the
+	 * driver gets probed and the port should be mapped at that point.
+	 */
+	if (stm32port->port.mapbase == 0 || stm32port->port.membase == NULL)
+		return -ENXIO;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(&stm32port->port, co, baud, parity, bits, flow);
+}
+
+static struct console stm32_console = {
+	.name		= STM32_SERIAL_NAME,
+	.device		= uart_console_device,
+	.write		= stm32_console_write,
+	.setup		= stm32_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &stm32_usart_driver,
+};
+
+#define STM32_SERIAL_CONSOLE (&stm32_console)
+
+#else
+#define STM32_SERIAL_CONSOLE NULL
+#endif /* CONFIG_SERIAL_STM32_CONSOLE */
+
+static struct uart_driver stm32_usart_driver = {
+	.driver_name	= DRIVER_NAME,
+	.dev_name	= STM32_SERIAL_NAME,
+	.major		= 0,
+	.minor		= 0,
+	.nr		= STM32_MAX_PORTS,
+	.cons		= STM32_SERIAL_CONSOLE,
+};
+
+static struct platform_driver stm32_serial_driver = {
+	.probe		= stm32_serial_probe,
+	.remove		= stm32_serial_remove,
+	.driver	= {
+		.name	= DRIVER_NAME,
+		.of_match_table = of_match_ptr(stm32_match),
+	},
+};
+
+static int __init usart_init(void)
+{
+	static char banner[] __initdata = "STM32 USART driver initialized";
+	int ret;
+
+	pr_info("%s\n", banner);
+
+	ret = uart_register_driver(&stm32_usart_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&stm32_serial_driver);
+	if (ret)
+		uart_unregister_driver(&stm32_usart_driver);
+
+	return ret;
+}
+
+static void __exit usart_exit(void)
+{
+	platform_driver_unregister(&stm32_serial_driver);
+	uart_unregister_driver(&stm32_usart_driver);
+}
+
+module_init(usart_init);
+module_exit(usart_exit);
+
+MODULE_ALIAS("platform:" DRIVER_NAME);
+MODULE_DESCRIPTION("STMicroelectronics STM32 serial port driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index b2122813f18a..93ba148f923e 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -258,4 +258,7 @@
 /* Cris v10 / v32 SoC */
 #define PORT_CRIS	112
 
+/* STM32 USART */
+#define PORT_STM32	113
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3


From 2157e7b82f3b81f57bd80cd67cef09ef26e5f74c Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Fri, 5 Jun 2015 16:35:25 +1000
Subject: vfio: powerpc/spapr: Register memory and define IOMMU v2

The existing implementation accounts the whole DMA window in
the locked_vm counter. This is going to be worse with multiple
containers and huge DMA windows. Also, real-time accounting would requite
additional tracking of accounted pages due to the page size difference -
IOMMU uses 4K pages and system uses 4K or 64K pages.

Another issue is that actual pages pinning/unpinning happens on every
DMA map/unmap request. This does not affect the performance much now as
we spend way too much time now on switching context between
guest/userspace/host but this will start to matter when we add in-kernel
DMA map/unmap acceleration.

This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU.
New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces
2 new ioctls to register/unregister DMA memory -
VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY -
which receive user space address and size of a memory region which
needs to be pinned/unpinned and counted in locked_vm.
New IOMMU splits physical pages pinning and TCE table update
into 2 different operations. It requires:
1) guest pages to be registered first
2) consequent map/unmap requests to work only with pre-registered memory.
For the default single window case this means that the entire guest
(instead of 2GB) needs to be pinned before using VFIO.
When a huge DMA window is added, no additional pinning will be
required, otherwise it would be guest RAM + 2GB.

The new memory registration ioctls are not supported by
VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration
will require memory to be preregistered in order to work.

The accounting is done per the user process.

This advertises v2 SPAPR TCE IOMMU and restricts what the userspace
can do with v1 or v2 IOMMUs.

In order to support memory pre-registration, we need a way to track
the use of every registered memory region and only allow unregistration
if a region is not in use anymore. So we need a way to tell from what
region the just cleared TCE was from.

This adds a userspace view of the TCE table into iommu_table struct.
It contains userspace address, one per TCE entry. The table is only
allocated when the ownership over an IOMMU group is taken which means
it is only used from outside of the powernv code (such as VFIO).

As v2 IOMMU supports IODA2 and pre-IODA2 IOMMUs (which do not support
DDW API), this creates a default DMA window for IODA2 for consistency.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/vfio.txt              |  31 ++-
 arch/powerpc/include/asm/iommu.h    |   6 +
 drivers/vfio/vfio_iommu_spapr_tce.c | 501 ++++++++++++++++++++++++++++++------
 include/uapi/linux/vfio.h           |  27 ++
 4 files changed, 482 insertions(+), 83 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 4c746a7e717a..dcc37e109c68 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -289,10 +289,12 @@ PPC64 sPAPR implementation note
 
 This implementation has some specifics:
 
-1) Only one IOMMU group per container is supported as an IOMMU group
-represents the minimal entity which isolation can be guaranteed for and
-groups are allocated statically, one per a Partitionable Endpoint (PE)
+1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per
+container is supported as an IOMMU table is allocated at the boot time,
+one table per a IOMMU group which is a Partitionable Endpoint (PE)
 (PE is often a PCI domain but not always).
+Newer systems (POWER8 with IODA2) have improved hardware design which allows
+to remove this limitation and have multiple IOMMU groups per a VFIO container.
 
 2) The hardware supports so called DMA windows - the PCI address range
 within which DMA transfer is allowed, any attempt to access address space
@@ -439,6 +441,29 @@ The code flow from the example above should be slightly changed:
 
 	....
 
+5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/
+VFIO_IOMMU_DISABLE and implements 2 new ioctls:
+VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY
+(which are unsupported in v1 IOMMU).
+
+PPC64 paravirtualized guests generate a lot of map/unmap requests,
+and the handling of those includes pinning/unpinning pages and updating
+mm::locked_vm counter to make sure we do not exceed the rlimit.
+The v2 IOMMU splits accounting and pinning into separate operations:
+
+- VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls
+receive a user space address and size of the block to be pinned.
+Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to
+be called with the exact address and size used for registering
+the memory block. The userspace is not expected to call these often.
+The ranges are stored in a linked list in a VFIO container.
+
+- VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual
+IOMMU table and do not do pinning; instead these check that the userspace
+address is from pre-registered range.
+
+This separation helps in optimizing DMA for guests.
+
 -------------------------------------------------------------------------------
 
 [1] VFIO was originally an acronym for "Virtual Function I/O" in its
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 9d3749287689..f9957eb4c659 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -112,9 +112,15 @@ struct iommu_table {
 	unsigned long *it_map;       /* A simple allocation bitmap for now */
 	unsigned long  it_page_shift;/* table iommu page size */
 	struct list_head it_group_list;/* List of iommu_table_group_link */
+	unsigned long *it_userspace; /* userspace view of the table */
 	struct iommu_table_ops *it_ops;
 };
 
+#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+		((tbl)->it_userspace ? \
+			&((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
+			NULL)
+
 /* Pure 2^n version of get_order */
 static inline __attribute_const__
 int get_iommu_order(unsigned long size, struct iommu_table *tbl)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 203caacf2242..91a32239bd0a 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -19,8 +19,10 @@
 #include <linux/uaccess.h>
 #include <linux/err.h>
 #include <linux/vfio.h>
+#include <linux/vmalloc.h>
 #include <asm/iommu.h>
 #include <asm/tce.h>
+#include <asm/mmu_context.h>
 
 #define DRIVER_VERSION  "0.1"
 #define DRIVER_AUTHOR   "aik@ozlabs.ru"
@@ -81,6 +83,11 @@ static void decrement_locked_vm(long npages)
  * into DMA'ble space using the IOMMU
  */
 
+struct tce_iommu_group {
+	struct list_head next;
+	struct iommu_group *grp;
+};
+
 /*
  * The container descriptor supports only a single group per container.
  * Required by the API as the container is not supplied with the IOMMU group
@@ -88,11 +95,84 @@ static void decrement_locked_vm(long npages)
  */
 struct tce_container {
 	struct mutex lock;
-	struct iommu_group *grp;
 	bool enabled;
+	bool v2;
 	unsigned long locked_pages;
+	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+	struct list_head group_list;
 };
 
+static long tce_iommu_unregister_pages(struct tce_container *container,
+		__u64 vaddr, __u64 size)
+{
+	struct mm_iommu_table_group_mem_t *mem;
+
+	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
+		return -EINVAL;
+
+	mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT);
+	if (!mem)
+		return -ENOENT;
+
+	return mm_iommu_put(mem);
+}
+
+static long tce_iommu_register_pages(struct tce_container *container,
+		__u64 vaddr, __u64 size)
+{
+	long ret = 0;
+	struct mm_iommu_table_group_mem_t *mem = NULL;
+	unsigned long entries = size >> PAGE_SHIFT;
+
+	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
+			((vaddr + size) < vaddr))
+		return -EINVAL;
+
+	ret = mm_iommu_get(vaddr, entries, &mem);
+	if (ret)
+		return ret;
+
+	container->enabled = true;
+
+	return 0;
+}
+
+static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl)
+{
+	unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
+			tbl->it_size, PAGE_SIZE);
+	unsigned long *uas;
+	long ret;
+
+	BUG_ON(tbl->it_userspace);
+
+	ret = try_increment_locked_vm(cb >> PAGE_SHIFT);
+	if (ret)
+		return ret;
+
+	uas = vzalloc(cb);
+	if (!uas) {
+		decrement_locked_vm(cb >> PAGE_SHIFT);
+		return -ENOMEM;
+	}
+	tbl->it_userspace = uas;
+
+	return 0;
+}
+
+static void tce_iommu_userspace_view_free(struct iommu_table *tbl)
+{
+	unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
+			tbl->it_size, PAGE_SIZE);
+
+	if (!tbl->it_userspace)
+		return;
+
+	vfree(tbl->it_userspace);
+	tbl->it_userspace = NULL;
+	decrement_locked_vm(cb >> PAGE_SHIFT);
+}
+
 static bool tce_page_is_contained(struct page *page, unsigned page_shift)
 {
 	/*
@@ -103,18 +183,18 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
 	return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
 }
 
+static inline bool tce_groups_attached(struct tce_container *container)
+{
+	return !list_empty(&container->group_list);
+}
+
 static long tce_iommu_find_table(struct tce_container *container,
 		phys_addr_t ioba, struct iommu_table **ptbl)
 {
 	long i;
-	struct iommu_table_group *table_group;
-
-	table_group = iommu_group_get_iommudata(container->grp);
-	if (!table_group)
-		return -1;
 
 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-		struct iommu_table *tbl = table_group->tables[i];
+		struct iommu_table *tbl = container->tables[i];
 
 		if (tbl) {
 			unsigned long entry = ioba >> tbl->it_page_shift;
@@ -136,9 +216,7 @@ static int tce_iommu_enable(struct tce_container *container)
 	int ret = 0;
 	unsigned long locked;
 	struct iommu_table_group *table_group;
-
-	if (!container->grp)
-		return -ENXIO;
+	struct tce_iommu_group *tcegrp;
 
 	if (!current->mm)
 		return -ESRCH; /* process exited */
@@ -175,7 +253,12 @@ static int tce_iommu_enable(struct tce_container *container)
 	 * as there is no way to know how much we should increment
 	 * the locked_vm counter.
 	 */
-	table_group = iommu_group_get_iommudata(container->grp);
+	if (!tce_groups_attached(container))
+		return -ENODEV;
+
+	tcegrp = list_first_entry(&container->group_list,
+			struct tce_iommu_group, next);
+	table_group = iommu_group_get_iommudata(tcegrp->grp);
 	if (!table_group)
 		return -ENODEV;
 
@@ -211,7 +294,7 @@ static void *tce_iommu_open(unsigned long arg)
 {
 	struct tce_container *container;
 
-	if (arg != VFIO_SPAPR_TCE_IOMMU) {
+	if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
 		pr_err("tce_vfio: Wrong IOMMU type\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -221,18 +304,45 @@ static void *tce_iommu_open(unsigned long arg)
 		return ERR_PTR(-ENOMEM);
 
 	mutex_init(&container->lock);
+	INIT_LIST_HEAD_RCU(&container->group_list);
+
+	container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
 
 	return container;
 }
 
+static int tce_iommu_clear(struct tce_container *container,
+		struct iommu_table *tbl,
+		unsigned long entry, unsigned long pages);
+static void tce_iommu_free_table(struct iommu_table *tbl);
+
 static void tce_iommu_release(void *iommu_data)
 {
 	struct tce_container *container = iommu_data;
+	struct iommu_table_group *table_group;
+	struct tce_iommu_group *tcegrp;
+	long i;
 
-	WARN_ON(container->grp);
+	while (tce_groups_attached(container)) {
+		tcegrp = list_first_entry(&container->group_list,
+				struct tce_iommu_group, next);
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
+		tce_iommu_detach_group(iommu_data, tcegrp->grp);
+	}
 
-	if (container->grp)
-		tce_iommu_detach_group(iommu_data, container->grp);
+	/*
+	 * If VFIO created a table, it was not disposed
+	 * by tce_iommu_detach_group() so do it now.
+	 */
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		struct iommu_table *tbl = container->tables[i];
+
+		if (!tbl)
+			continue;
+
+		tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+		tce_iommu_free_table(tbl);
+	}
 
 	tce_iommu_disable(container);
 	mutex_destroy(&container->lock);
@@ -249,6 +359,47 @@ static void tce_iommu_unuse_page(struct tce_container *container,
 	put_page(page);
 }
 
+static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size,
+		unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
+{
+	long ret = 0;
+	struct mm_iommu_table_group_mem_t *mem;
+
+	mem = mm_iommu_lookup(tce, size);
+	if (!mem)
+		return -EINVAL;
+
+	ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
+	if (ret)
+		return -EINVAL;
+
+	*pmem = mem;
+
+	return 0;
+}
+
+static void tce_iommu_unuse_page_v2(struct iommu_table *tbl,
+		unsigned long entry)
+{
+	struct mm_iommu_table_group_mem_t *mem = NULL;
+	int ret;
+	unsigned long hpa = 0;
+	unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+	if (!pua || !current || !current->mm)
+		return;
+
+	ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl),
+			&hpa, &mem);
+	if (ret)
+		pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
+				__func__, *pua, entry, ret);
+	if (mem)
+		mm_iommu_mapped_dec(mem);
+
+	*pua = 0;
+}
+
 static int tce_iommu_clear(struct tce_container *container,
 		struct iommu_table *tbl,
 		unsigned long entry, unsigned long pages)
@@ -267,6 +418,11 @@ static int tce_iommu_clear(struct tce_container *container,
 		if (direction == DMA_NONE)
 			continue;
 
+		if (container->v2) {
+			tce_iommu_unuse_page_v2(tbl, entry);
+			continue;
+		}
+
 		tce_iommu_unuse_page(container, oldhpa);
 	}
 
@@ -333,6 +489,64 @@ static long tce_iommu_build(struct tce_container *container,
 	return ret;
 }
 
+static long tce_iommu_build_v2(struct tce_container *container,
+		struct iommu_table *tbl,
+		unsigned long entry, unsigned long tce, unsigned long pages,
+		enum dma_data_direction direction)
+{
+	long i, ret = 0;
+	struct page *page;
+	unsigned long hpa;
+	enum dma_data_direction dirtmp;
+
+	for (i = 0; i < pages; ++i) {
+		struct mm_iommu_table_group_mem_t *mem = NULL;
+		unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
+				entry + i);
+
+		ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl),
+				&hpa, &mem);
+		if (ret)
+			break;
+
+		page = pfn_to_page(hpa >> PAGE_SHIFT);
+		if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+			ret = -EPERM;
+			break;
+		}
+
+		/* Preserve offset within IOMMU page */
+		hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
+		dirtmp = direction;
+
+		/* The registered region is being unregistered */
+		if (mm_iommu_mapped_inc(mem))
+			break;
+
+		ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+		if (ret) {
+			/* dirtmp cannot be DMA_NONE here */
+			tce_iommu_unuse_page_v2(tbl, entry + i);
+			pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
+					__func__, entry << tbl->it_page_shift,
+					tce, ret);
+			break;
+		}
+
+		if (dirtmp != DMA_NONE)
+			tce_iommu_unuse_page_v2(tbl, entry + i);
+
+		*pua = tce;
+
+		tce += IOMMU_PAGE_SIZE(tbl);
+	}
+
+	if (ret)
+		tce_iommu_clear(container, tbl, entry, i);
+
+	return ret;
+}
+
 static long tce_iommu_create_table(struct tce_container *container,
 			struct iommu_table_group *table_group,
 			int num,
@@ -358,6 +572,12 @@ static long tce_iommu_create_table(struct tce_container *container,
 	WARN_ON(!ret && !(*ptbl)->it_ops->free);
 	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
 
+	if (!ret && container->v2) {
+		ret = tce_iommu_userspace_view_alloc(*ptbl);
+		if (ret)
+			(*ptbl)->it_ops->free(*ptbl);
+	}
+
 	if (ret)
 		decrement_locked_vm(table_size >> PAGE_SHIFT);
 
@@ -368,6 +588,7 @@ static void tce_iommu_free_table(struct iommu_table *tbl)
 {
 	unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
 
+	tce_iommu_userspace_view_free(tbl);
 	tbl->it_ops->free(tbl);
 	decrement_locked_vm(pages);
 }
@@ -383,6 +604,7 @@ static long tce_iommu_ioctl(void *iommu_data,
 	case VFIO_CHECK_EXTENSION:
 		switch (arg) {
 		case VFIO_SPAPR_TCE_IOMMU:
+		case VFIO_SPAPR_TCE_v2_IOMMU:
 			ret = 1;
 			break;
 		default:
@@ -394,12 +616,15 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 		struct vfio_iommu_spapr_tce_info info;
+		struct tce_iommu_group *tcegrp;
 		struct iommu_table_group *table_group;
 
-		if (WARN_ON(!container->grp))
+		if (!tce_groups_attached(container))
 			return -ENXIO;
 
-		table_group = iommu_group_get_iommudata(container->grp);
+		tcegrp = list_first_entry(&container->group_list,
+				struct tce_iommu_group, next);
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
 
 		if (!table_group)
 			return -ENXIO;
@@ -468,11 +693,18 @@ static long tce_iommu_ioctl(void *iommu_data,
 		if (ret)
 			return ret;
 
-		ret = tce_iommu_build(container, tbl,
-				param.iova >> tbl->it_page_shift,
-				param.vaddr,
-				param.size >> tbl->it_page_shift,
-				direction);
+		if (container->v2)
+			ret = tce_iommu_build_v2(container, tbl,
+					param.iova >> tbl->it_page_shift,
+					param.vaddr,
+					param.size >> tbl->it_page_shift,
+					direction);
+		else
+			ret = tce_iommu_build(container, tbl,
+					param.iova >> tbl->it_page_shift,
+					param.vaddr,
+					param.size >> tbl->it_page_shift,
+					direction);
 
 		iommu_flush_tce(tbl);
 
@@ -518,7 +750,62 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 		return ret;
 	}
+	case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
+		struct vfio_iommu_spapr_register_memory param;
+
+		if (!container->v2)
+			break;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
+				size);
+
+		if (copy_from_user(&param, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (param.argsz < minsz)
+			return -EINVAL;
+
+		/* No flag is supported now */
+		if (param.flags)
+			return -EINVAL;
+
+		mutex_lock(&container->lock);
+		ret = tce_iommu_register_pages(container, param.vaddr,
+				param.size);
+		mutex_unlock(&container->lock);
+
+		return ret;
+	}
+	case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
+		struct vfio_iommu_spapr_register_memory param;
+
+		if (!container->v2)
+			break;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
+				size);
+
+		if (copy_from_user(&param, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (param.argsz < minsz)
+			return -EINVAL;
+
+		/* No flag is supported now */
+		if (param.flags)
+			return -EINVAL;
+
+		mutex_lock(&container->lock);
+		ret = tce_iommu_unregister_pages(container, param.vaddr,
+				param.size);
+		mutex_unlock(&container->lock);
+
+		return ret;
+	}
 	case VFIO_IOMMU_ENABLE:
+		if (container->v2)
+			break;
+
 		mutex_lock(&container->lock);
 		ret = tce_iommu_enable(container);
 		mutex_unlock(&container->lock);
@@ -526,16 +813,27 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 
 	case VFIO_IOMMU_DISABLE:
+		if (container->v2)
+			break;
+
 		mutex_lock(&container->lock);
 		tce_iommu_disable(container);
 		mutex_unlock(&container->lock);
 		return 0;
-	case VFIO_EEH_PE_OP:
-		if (!container->grp)
-			return -ENODEV;
 
-		return vfio_spapr_iommu_eeh_ioctl(container->grp,
-						  cmd, arg);
+	case VFIO_EEH_PE_OP: {
+		struct tce_iommu_group *tcegrp;
+
+		ret = 0;
+		list_for_each_entry(tcegrp, &container->group_list, next) {
+			ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
+					cmd, arg);
+			if (ret)
+				return ret;
+		}
+		return ret;
+	}
+
 	}
 
 	return -ENOTTY;
@@ -547,14 +845,17 @@ static void tce_iommu_release_ownership(struct tce_container *container,
 	int i;
 
 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-		struct iommu_table *tbl = table_group->tables[i];
+		struct iommu_table *tbl = container->tables[i];
 
 		if (!tbl)
 			continue;
 
 		tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+		tce_iommu_userspace_view_free(tbl);
 		if (tbl->it_map)
 			iommu_release_ownership(tbl);
+
+		container->tables[i] = NULL;
 	}
 }
 
@@ -569,7 +870,10 @@ static int tce_iommu_take_ownership(struct tce_container *container,
 		if (!tbl || !tbl->it_map)
 			continue;
 
-		rc = iommu_take_ownership(tbl);
+		rc = tce_iommu_userspace_view_alloc(tbl);
+		if (!rc)
+			rc = iommu_take_ownership(tbl);
+
 		if (rc) {
 			for (j = 0; j < i; ++j)
 				iommu_release_ownership(
@@ -579,6 +883,9 @@ static int tce_iommu_take_ownership(struct tce_container *container,
 		}
 	}
 
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+		container->tables[i] = table_group->tables[i];
+
 	return 0;
 }
 
@@ -592,18 +899,8 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
 		return;
 	}
 
-	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-		/* Store table pointer as unset_window resets it */
-		struct iommu_table *tbl = table_group->tables[i];
-
-		if (!tbl)
-			continue;
-
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
 		table_group->ops->unset_window(table_group, i);
-		tce_iommu_clear(container, tbl,
-				tbl->it_offset, tbl->it_size);
-		tce_iommu_free_table(tbl);
-	}
 
 	table_group->ops->release_ownership(table_group);
 }
@@ -611,7 +908,7 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
 static long tce_iommu_take_ownership_ddw(struct tce_container *container,
 		struct iommu_table_group *table_group)
 {
-	long ret;
+	long i, ret = 0;
 	struct iommu_table *tbl = NULL;
 
 	if (!table_group->ops->create_table || !table_group->ops->set_window ||
@@ -622,23 +919,45 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
 
 	table_group->ops->take_ownership(table_group);
 
-	ret = tce_iommu_create_table(container,
-			table_group,
-			0, /* window number */
-			IOMMU_PAGE_SHIFT_4K,
-			table_group->tce32_size,
-			1, /* default levels */
-			&tbl);
-	if (!ret) {
-		ret = table_group->ops->set_window(table_group, 0, tbl);
+	/*
+	 * If it the first group attached, check if there is
+	 * a default DMA window and create one if none as
+	 * the userspace expects it to exist.
+	 */
+	if (!tce_groups_attached(container) && !container->tables[0]) {
+		ret = tce_iommu_create_table(container,
+				table_group,
+				0, /* window number */
+				IOMMU_PAGE_SHIFT_4K,
+				table_group->tce32_size,
+				1, /* default levels */
+				&tbl);
 		if (ret)
-			tce_iommu_free_table(tbl);
+			goto release_exit;
 		else
-			table_group->tables[0] = tbl;
+			container->tables[0] = tbl;
 	}
 
-	if (ret)
-		table_group->ops->release_ownership(table_group);
+	/* Set all windows to the new group */
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		tbl = container->tables[i];
+
+		if (!tbl)
+			continue;
+
+		/* Set the default window to a new group */
+		ret = table_group->ops->set_window(table_group, i, tbl);
+		if (ret)
+			goto release_exit;
+	}
+
+	return 0;
+
+release_exit:
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+		table_group->ops->unset_window(table_group, i);
+
+	table_group->ops->release_ownership(table_group);
 
 	return ret;
 }
@@ -649,29 +968,44 @@ static int tce_iommu_attach_group(void *iommu_data,
 	int ret;
 	struct tce_container *container = iommu_data;
 	struct iommu_table_group *table_group;
+	struct tce_iommu_group *tcegrp = NULL;
 
 	mutex_lock(&container->lock);
 
 	/* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
 			iommu_group_id(iommu_group), iommu_group); */
-	if (container->grp) {
-		pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
-				iommu_group_id(container->grp),
-				iommu_group_id(iommu_group));
+	table_group = iommu_group_get_iommudata(iommu_group);
+
+	if (tce_groups_attached(container) && (!table_group->ops ||
+			!table_group->ops->take_ownership ||
+			!table_group->ops->release_ownership)) {
 		ret = -EBUSY;
 		goto unlock_exit;
 	}
 
-	if (container->enabled) {
-		pr_err("tce_vfio: attaching group #%u to enabled container\n",
-				iommu_group_id(iommu_group));
-		ret = -EBUSY;
-		goto unlock_exit;
+	/* Check if new group has the same iommu_ops (i.e. compatible) */
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		struct iommu_table_group *table_group_tmp;
+
+		if (tcegrp->grp == iommu_group) {
+			pr_warn("tce_vfio: Group %d is already attached\n",
+					iommu_group_id(iommu_group));
+			ret = -EBUSY;
+			goto unlock_exit;
+		}
+		table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
+		if (table_group_tmp->ops != table_group->ops) {
+			pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
+					iommu_group_id(iommu_group),
+					iommu_group_id(tcegrp->grp));
+			ret = -EPERM;
+			goto unlock_exit;
+		}
 	}
 
-	table_group = iommu_group_get_iommudata(iommu_group);
-	if (!table_group) {
-		ret = -ENXIO;
+	tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
+	if (!tcegrp) {
+		ret = -ENOMEM;
 		goto unlock_exit;
 	}
 
@@ -681,10 +1015,15 @@ static int tce_iommu_attach_group(void *iommu_data,
 	else
 		ret = tce_iommu_take_ownership_ddw(container, table_group);
 
-	if (!ret)
-		container->grp = iommu_group;
+	if (!ret) {
+		tcegrp->grp = iommu_group;
+		list_add(&tcegrp->next, &container->group_list);
+	}
 
 unlock_exit:
+	if (ret && tcegrp)
+		kfree(tcegrp);
+
 	mutex_unlock(&container->lock);
 
 	return ret;
@@ -695,24 +1034,26 @@ static void tce_iommu_detach_group(void *iommu_data,
 {
 	struct tce_container *container = iommu_data;
 	struct iommu_table_group *table_group;
+	bool found = false;
+	struct tce_iommu_group *tcegrp;
 
 	mutex_lock(&container->lock);
-	if (iommu_group != container->grp) {
-		pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
-				iommu_group_id(iommu_group),
-				iommu_group_id(container->grp));
-		goto unlock_exit;
+
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		if (tcegrp->grp == iommu_group) {
+			found = true;
+			break;
+		}
 	}
 
-	if (container->enabled) {
-		pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
-				iommu_group_id(container->grp));
-		tce_iommu_disable(container);
+	if (!found) {
+		pr_warn("tce_vfio: detaching unattached group #%u\n",
+				iommu_group_id(iommu_group));
+		goto unlock_exit;
 	}
 
-	/* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
-	   iommu_group_id(iommu_group), iommu_group); */
-	container->grp = NULL;
+	list_del(&tcegrp->next);
+	kfree(tcegrp);
 
 	table_group = iommu_group_get_iommudata(iommu_group);
 	BUG_ON(!table_group);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index e4fa1995f613..fa84391a0d00 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -36,6 +36,8 @@
 /* Two-stage IOMMU */
 #define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
 
+#define VFIO_SPAPR_TCE_v2_IOMMU		7
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -507,6 +509,31 @@ struct vfio_eeh_pe_op {
 
 #define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
 
+/**
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory)
+ *
+ * Registers user space memory where DMA is allowed. It pins
+ * user pages and does the locked memory accounting so
+ * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls
+ * get faster.
+ */
+struct vfio_iommu_spapr_register_memory {
+	__u32	argsz;
+	__u32	flags;
+	__u64	vaddr;				/* Process virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/**
+ * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory)
+ *
+ * Unregisters user space memory registered with
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY.
+ * Uses vfio_iommu_spapr_register_memory for parameters.
+ */
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
cgit v1.2.3


From e633bc86a922468a82300eef5b9802e17be5e23d Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Fri, 5 Jun 2015 16:35:26 +1000
Subject: vfio: powerpc/spapr: Support Dynamic DMA windows

This adds create/remove window ioctls to create and remove DMA windows.
sPAPR defines a Dynamic DMA windows capability which allows
para-virtualized guests to create additional DMA windows on a PCI bus.
The existing linux kernels use this new window to map the entire guest
memory and switch to the direct DMA operations saving time on map/unmap
requests which would normally happen in a big amounts.

This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and
VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows.
Up to 2 windows are supported now by the hardware and by this driver.

This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional
information such as a number of supported windows and maximum number
levels of TCE tables.

DDW is added as a capability, not as a SPAPR TCE IOMMU v2 unique feature
as we still want to support v2 on platforms which cannot do DDW for
the sake of TCE acceleration in KVM (coming soon).

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/vfio.txt              |  19 ++++
 arch/powerpc/include/asm/iommu.h    |   2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c | 196 +++++++++++++++++++++++++++++++++++-
 include/uapi/linux/vfio.h           |  61 ++++++++++-
 4 files changed, 273 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index dcc37e109c68..1dd3fddfd3a1 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -464,6 +464,25 @@ address is from pre-registered range.
 
 This separation helps in optimizing DMA for guests.
 
+6) sPAPR specification allows guests to have an additional DMA window(s) on
+a PCI bus with a variable page size. Two ioctls have been added to support
+this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
+The platform has to support the functionality or error will be returned to
+the userspace. The existing hardware supports up to 2 DMA windows, one is
+2GB long, uses 4K pages and called "default 32bit window"; the other can
+be as big as entire RAM, use different page size, it is optional - guests
+create those in run-time if the guest driver supports 64bit DMA.
+
+VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
+a number of TCE table levels (if a TCE table is going to be big enough and
+the kernel may not be able to allocate enough of physically contiguous memory).
+It creates a new window in the available slot and returns the bus address where
+the new window starts. Due to hardware limitation, the user space cannot choose
+the location of DMA windows.
+
+VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
+and removes it.
+
 -------------------------------------------------------------------------------
 
 [1] VFIO was originally an acronym for "Virtual Function I/O" in its
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index f9957eb4c659..ca18cff90900 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -149,7 +149,7 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
  */
 extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
 					    int nid);
-#define IOMMU_TABLE_GROUP_MAX_TABLES	1
+#define IOMMU_TABLE_GROUP_MAX_TABLES	2
 
 struct iommu_table_group;
 
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 91a32239bd0a..0582b72ef377 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -211,6 +211,18 @@ static long tce_iommu_find_table(struct tce_container *container,
 	return -1;
 }
 
+static int tce_iommu_find_free_table(struct tce_container *container)
+{
+	int i;
+
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		if (!container->tables[i])
+			return i;
+	}
+
+	return -ENOSPC;
+}
+
 static int tce_iommu_enable(struct tce_container *container)
 {
 	int ret = 0;
@@ -593,11 +605,115 @@ static void tce_iommu_free_table(struct iommu_table *tbl)
 	decrement_locked_vm(pages);
 }
 
+static long tce_iommu_create_window(struct tce_container *container,
+		__u32 page_shift, __u64 window_size, __u32 levels,
+		__u64 *start_addr)
+{
+	struct tce_iommu_group *tcegrp;
+	struct iommu_table_group *table_group;
+	struct iommu_table *tbl = NULL;
+	long ret, num;
+
+	num = tce_iommu_find_free_table(container);
+	if (num < 0)
+		return num;
+
+	/* Get the first group for ops::create_table */
+	tcegrp = list_first_entry(&container->group_list,
+			struct tce_iommu_group, next);
+	table_group = iommu_group_get_iommudata(tcegrp->grp);
+	if (!table_group)
+		return -EFAULT;
+
+	if (!(table_group->pgsizes & (1ULL << page_shift)))
+		return -EINVAL;
+
+	if (!table_group->ops->set_window || !table_group->ops->unset_window ||
+			!table_group->ops->get_table_size ||
+			!table_group->ops->create_table)
+		return -EPERM;
+
+	/* Create TCE table */
+	ret = tce_iommu_create_table(container, table_group, num,
+			page_shift, window_size, levels, &tbl);
+	if (ret)
+		return ret;
+
+	BUG_ON(!tbl->it_ops->free);
+
+	/*
+	 * Program the table to every group.
+	 * Groups have been tested for compatibility at the attach time.
+	 */
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
+
+		ret = table_group->ops->set_window(table_group, num, tbl);
+		if (ret)
+			goto unset_exit;
+	}
+
+	container->tables[num] = tbl;
+
+	/* Return start address assigned by platform in create_table() */
+	*start_addr = tbl->it_offset << tbl->it_page_shift;
+
+	return 0;
+
+unset_exit:
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
+		table_group->ops->unset_window(table_group, num);
+	}
+	tce_iommu_free_table(tbl);
+
+	return ret;
+}
+
+static long tce_iommu_remove_window(struct tce_container *container,
+		__u64 start_addr)
+{
+	struct iommu_table_group *table_group = NULL;
+	struct iommu_table *tbl;
+	struct tce_iommu_group *tcegrp;
+	int num;
+
+	num = tce_iommu_find_table(container, start_addr, &tbl);
+	if (num < 0)
+		return -EINVAL;
+
+	BUG_ON(!tbl->it_size);
+
+	/* Detach groups from IOMMUs */
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
+
+		/*
+		 * SPAPR TCE IOMMU exposes the default DMA window to
+		 * the guest via dma32_window_start/size of
+		 * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow
+		 * the userspace to remove this window, some do not so
+		 * here we check for the platform capability.
+		 */
+		if (!table_group->ops || !table_group->ops->unset_window)
+			return -EPERM;
+
+		table_group->ops->unset_window(table_group, num);
+	}
+
+	/* Free table */
+	tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+	tce_iommu_free_table(tbl);
+	container->tables[num] = NULL;
+
+	return 0;
+}
+
 static long tce_iommu_ioctl(void *iommu_data,
 				 unsigned int cmd, unsigned long arg)
 {
 	struct tce_container *container = iommu_data;
-	unsigned long minsz;
+	unsigned long minsz, ddwsz;
 	long ret;
 
 	switch (cmd) {
@@ -641,6 +757,21 @@ static long tce_iommu_ioctl(void *iommu_data,
 		info.dma32_window_start = table_group->tce32_start;
 		info.dma32_window_size = table_group->tce32_size;
 		info.flags = 0;
+		memset(&info.ddw, 0, sizeof(info.ddw));
+
+		if (table_group->max_dynamic_windows_supported &&
+				container->v2) {
+			info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
+			info.ddw.pgsizes = table_group->pgsizes;
+			info.ddw.max_dynamic_windows_supported =
+				table_group->max_dynamic_windows_supported;
+			info.ddw.levels = table_group->max_levels;
+		}
+
+		ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
+
+		if (info.argsz >= ddwsz)
+			minsz = ddwsz;
 
 		if (copy_to_user((void __user *)arg, &info, minsz))
 			return -EFAULT;
@@ -834,6 +965,69 @@ static long tce_iommu_ioctl(void *iommu_data,
 		return ret;
 	}
 
+	case VFIO_IOMMU_SPAPR_TCE_CREATE: {
+		struct vfio_iommu_spapr_tce_create create;
+
+		if (!container->v2)
+			break;
+
+		if (!tce_groups_attached(container))
+			return -ENXIO;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
+				start_addr);
+
+		if (copy_from_user(&create, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (create.argsz < minsz)
+			return -EINVAL;
+
+		if (create.flags)
+			return -EINVAL;
+
+		mutex_lock(&container->lock);
+
+		ret = tce_iommu_create_window(container, create.page_shift,
+				create.window_size, create.levels,
+				&create.start_addr);
+
+		mutex_unlock(&container->lock);
+
+		if (!ret && copy_to_user((void __user *)arg, &create, minsz))
+			ret = -EFAULT;
+
+		return ret;
+	}
+	case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
+		struct vfio_iommu_spapr_tce_remove remove;
+
+		if (!container->v2)
+			break;
+
+		if (!tce_groups_attached(container))
+			return -ENXIO;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
+				start_addr);
+
+		if (copy_from_user(&remove, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (remove.argsz < minsz)
+			return -EINVAL;
+
+		if (remove.flags)
+			return -EINVAL;
+
+		mutex_lock(&container->lock);
+
+		ret = tce_iommu_remove_window(container, remove.start_addr);
+
+		mutex_unlock(&container->lock);
+
+		return ret;
+	}
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index fa84391a0d00..9fd7b5d8df2f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -444,6 +444,23 @@ struct vfio_iommu_type1_dma_unmap {
 
 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 
+/*
+ * The SPAPR TCE DDW info struct provides the information about
+ * the details of Dynamic DMA window capability.
+ *
+ * @pgsizes contains a page size bitmask, 4K/64K/16M are supported.
+ * @max_dynamic_windows_supported tells the maximum number of windows
+ * which the platform can create.
+ * @levels tells the maximum number of levels in multi-level IOMMU tables;
+ * this allows splitting a table into smaller chunks which reduces
+ * the amount of physically contiguous memory required for the table.
+ */
+struct vfio_iommu_spapr_tce_ddw_info {
+	__u64 pgsizes;			/* Bitmap of supported page sizes */
+	__u32 max_dynamic_windows_supported;
+	__u32 levels;
+};
+
 /*
  * The SPAPR TCE info struct provides the information about the PCI bus
  * address ranges available for DMA, these values are programmed into
@@ -454,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap {
  * addresses too so the window works as a filter rather than an offset
  * for IOVA addresses.
  *
- * A flag will need to be added if other page sizes are supported,
- * so as defined here, it is always 4k.
+ * Flags supported:
+ * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows
+ *   (DDW) support is present. @ddw is only supported when DDW is present.
  */
 struct vfio_iommu_spapr_tce_info {
 	__u32 argsz;
-	__u32 flags;			/* reserved for future use */
+	__u32 flags;
+#define VFIO_IOMMU_SPAPR_INFO_DDW	(1 << 0)	/* DDW supported */
 	__u32 dma32_window_start;	/* 32 bit window start (bytes) */
 	__u32 dma32_window_size;	/* 32 bit window size (bytes) */
+	struct vfio_iommu_spapr_tce_ddw_info ddw;
 };
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
@@ -534,6 +554,41 @@ struct vfio_iommu_spapr_register_memory {
  */
 #define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
 
+/**
+ * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create)
+ *
+ * Creates an additional TCE table and programs it (sets a new DMA window)
+ * to every IOMMU group in the container. It receives page shift, window
+ * size and number of levels in the TCE table being created.
+ *
+ * It allocates and returns an offset on a PCI bus of the new DMA window.
+ */
+struct vfio_iommu_spapr_tce_create {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u32 page_shift;
+	__u64 window_size;
+	__u32 levels;
+	/* out */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_CREATE	_IO(VFIO_TYPE, VFIO_BASE + 19)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove)
+ *
+ * Unprograms a TCE table from all groups in the container and destroys it.
+ * It receives a PCI bus offset as a window id.
+ */
+struct vfio_iommu_spapr_tce_remove {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE	_IO(VFIO_TYPE, VFIO_BASE + 20)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
cgit v1.2.3


From a4244b0cf58d56c171874e85228ba5deffeb017a Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Thu, 11 Jun 2015 10:28:16 +0300
Subject: net/ethtool: Add current supported tunable options

Add strings array of the current supported tunable options.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Reviewed-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h |  6 ++++++
 net/core/ethtool.c           | 12 ++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 0594933cdf55..cd67aec187d9 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -215,6 +215,11 @@ enum tunable_id {
 	ETHTOOL_ID_UNSPEC,
 	ETHTOOL_RX_COPYBREAK,
 	ETHTOOL_TX_COPYBREAK,
+	/*
+	 * Add your fresh new tubale attribute above and remember to update
+	 * tunable_strings[] in net/core/ethtool.c
+	 */
+	__ETHTOOL_TUNABLE_COUNT,
 };
 
 enum tunable_type_id {
@@ -545,6 +550,7 @@ enum ethtool_stringset {
 	ETH_SS_NTUPLE_FILTERS,
 	ETH_SS_FEATURES,
 	ETH_SS_RSS_HASH_FUNCS,
+	ETH_SS_TUNABLES,
 };
 
 /**
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index eb0c3ace7458..b495ab1797fa 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -106,6 +106,13 @@ rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
 	[ETH_RSS_HASH_XOR_BIT] =	"xor",
 };
 
+static const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_ID_UNSPEC]     = "Unspec",
+	[ETHTOOL_RX_COPYBREAK]	= "rx-copybreak",
+	[ETHTOOL_TX_COPYBREAK]	= "tx-copybreak",
+};
+
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_gfeatures cmd = {
@@ -194,6 +201,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
 	if (sset == ETH_SS_RSS_HASH_FUNCS)
 		return ARRAY_SIZE(rss_hash_func_strings);
 
+	if (sset == ETH_SS_TUNABLES)
+		return ARRAY_SIZE(tunable_strings);
+
 	if (ops->get_sset_count && ops->get_strings)
 		return ops->get_sset_count(dev, sset);
 	else
@@ -211,6 +221,8 @@ static void __ethtool_get_strings(struct net_device *dev,
 	else if (stringset == ETH_SS_RSS_HASH_FUNCS)
 		memcpy(data, rss_hash_func_strings,
 		       sizeof(rss_hash_func_strings));
+	else if (stringset == ETH_SS_TUNABLES)
+		memcpy(data, tunable_strings, sizeof(tunable_strings));
 	else
 		/* ops->get_strings is valid because checked earlier */
 		ops->get_strings(dev, stringset, data);
-- 
cgit v1.2.3


From 9961127d4bce6325e9a0b0fb105e0c85a6c62cb7 Mon Sep 17 00:00:00 2001
From: Vincent Cuissard <cuissard@marvell.com>
Date: Thu, 11 Jun 2015 11:25:47 +0200
Subject: NFC: nci: add generic uart support

Some NFC controller supports UART as host interface.
As with SPI, a lot of code can be shared between vendor
drivers. This patch add the generic support of UART and
provides some extension API for vendor specific needs.

This code is strongly inspired by the Bluetooth HCI ldisc
implementation. NCI UART vendor drivers will have to register
themselves to this layer via nci_uart_register.

Underlying tty will have to be configured from user land
thanks to an ioctl.

Signed-off-by: Vincent Cuissard <cuissard@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h      |   1 +
 include/net/nfc/nci_core.h |  47 +++++
 include/uapi/linux/tty.h   |   1 +
 net/nfc/nci/Kconfig        |   7 +
 net/nfc/nci/Makefile       |   3 +
 net/nfc/nci/uart.c         | 495 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 554 insertions(+)
 create mode 100644 net/nfc/nci/uart.c

(limited to 'include/uapi/linux')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index a2f2f3d3196d..75d2e1880059 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -35,6 +35,7 @@
 #define NCI_MAX_NUM_RF_CONFIGS					10
 #define NCI_MAX_NUM_CONN					10
 #define NCI_MAX_PARAM_LEN					251
+#define NCI_MAX_PACKET_SIZE					258
 
 /* NCI Status Codes */
 #define NCI_STATUS_OK						0x00
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 9d77ed556b78..01fc8c531115 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -31,6 +31,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/skbuff.h>
+#include <linux/tty.h>
 
 #include <net/nfc/nfc.h>
 #include <net/nfc/nci.h>
@@ -391,4 +392,50 @@ int nci_spi_send(struct nci_spi *nspi,
 		 struct sk_buff *skb);
 struct sk_buff *nci_spi_read(struct nci_spi *nspi);
 
+/* ----- NCI UART ---- */
+
+/* Ioctl */
+#define NCIUARTSETDRIVER	_IOW('U', 0, char *)
+
+enum nci_uart_driver {
+	NCI_UART_DRIVER_MARVELL = 0,
+	NCI_UART_DRIVER_MAX
+};
+
+struct nci_uart;
+
+struct nci_uart_ops {
+	int (*open)(struct nci_uart *nci_uart);
+	void (*close)(struct nci_uart *nci_uart);
+	int (*recv)(struct nci_uart *nci_uart, struct sk_buff *skb);
+	int (*recv_buf)(struct nci_uart *nci_uart, const u8 *data, char *flags,
+			int count);
+	int (*send)(struct nci_uart *nci_uart, struct sk_buff *skb);
+	void (*tx_start)(struct nci_uart *nci_uart);
+	void (*tx_done)(struct nci_uart *nci_uart);
+};
+
+struct nci_uart {
+	struct module		*owner;
+	struct nci_uart_ops	ops;
+	const char		*name;
+	enum nci_uart_driver	driver;
+
+	/* Dynamic data */
+	struct nci_dev		*ndev;
+	spinlock_t		rx_lock;
+	struct work_struct	write_work;
+	struct tty_struct	*tty;
+	unsigned long		tx_state;
+	struct sk_buff_head	tx_q;
+	struct sk_buff		*tx_skb;
+	struct sk_buff		*rx_skb;
+	int			rx_packet_len;
+	void			*drv_data;
+};
+
+int nci_uart_register(struct nci_uart *nu);
+void nci_uart_unregister(struct nci_uart *nu);
+void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl);
+
 #endif /* __NCI_CORE_H */
diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h
index dac199a2dba5..01c4410352ff 100644
--- a/include/uapi/linux/tty.h
+++ b/include/uapi/linux/tty.h
@@ -34,5 +34,6 @@
 #define N_TI_WL		22	/* for TI's WL BT, FM, GPS combo chips */
 #define N_TRACESINK	23	/* Trace data routing for MIPI P1149.7 */
 #define N_TRACEROUTER	24	/* Trace data routing for MIPI P1149.7 */
+#define N_NCI		25	/* NFC NCI UART */
 
 #endif /* _UAPI_LINUX_TTY_H */
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index a4f1e42e3481..901c1ddba841 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -19,3 +19,10 @@ config NFC_NCI_SPI
 	  an NFC Controller (NFCC) and a Device Host (DH).
 
 	  Say yes if you use an NCI driver that requires SPI link layer.
+
+config NFC_NCI_UART
+	depends on NFC_NCI && TTY
+	tristate "NCI over UART protocol support"
+	default n
+	help
+	  Say yes if you use an NCI driver that requires UART link layer.
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 7ed8949266cc..b4b85b82e988 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -7,3 +7,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o
 nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
 
 nci-$(CONFIG_NFC_NCI_SPI) += spi.o
+
+nci_uart-y += uart.o
+obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
new file mode 100644
index 000000000000..70c279543074
--- /dev/null
+++ b/net/nfc/nci/uart.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2015, Marvell International Ltd.
+ *
+ * This software file (the "File") is distributed by Marvell International
+ * Ltd. under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License").  You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available on the worldwide web at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
+ * this warranty disclaimer.
+
+ */
+
+/* Inspired (hugely) by HCI LDISC implementation in Bluetooth.
+ *
+ *  Copyright (C) 2000-2001  Qualcomm Incorporated
+ *  Copyright (C) 2002-2003  Maxim Krasnyansky <maxk@qualcomm.com>
+ *  Copyright (C) 2004-2005  Marcel Holtmann <marcel@holtmann.org>
+ */
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/interrupt.h>
+#include <linux/ptrace.h>
+#include <linux/poll.h>
+
+#include <linux/slab.h>
+#include <linux/tty.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/signal.h>
+#include <linux/ioctl.h>
+#include <linux/skbuff.h>
+
+#include <net/nfc/nci.h>
+#include <net/nfc/nci_core.h>
+
+/* TX states  */
+#define NCI_UART_SENDING	1
+#define NCI_UART_TX_WAKEUP	2
+
+static struct nci_uart *nci_uart_drivers[NCI_UART_DRIVER_MAX];
+
+static inline struct sk_buff *nci_uart_dequeue(struct nci_uart *nu)
+{
+	struct sk_buff *skb = nu->tx_skb;
+
+	if (!skb)
+		skb = skb_dequeue(&nu->tx_q);
+	else
+		nu->tx_skb = NULL;
+
+	return skb;
+}
+
+static inline int nci_uart_queue_empty(struct nci_uart *nu)
+{
+	if (nu->tx_skb)
+		return 0;
+
+	return skb_queue_empty(&nu->tx_q);
+}
+
+static int nci_uart_tx_wakeup(struct nci_uart *nu)
+{
+	if (test_and_set_bit(NCI_UART_SENDING, &nu->tx_state)) {
+		set_bit(NCI_UART_TX_WAKEUP, &nu->tx_state);
+		return 0;
+	}
+
+	schedule_work(&nu->write_work);
+
+	return 0;
+}
+
+static void nci_uart_write_work(struct work_struct *work)
+{
+	struct nci_uart *nu = container_of(work, struct nci_uart, write_work);
+	struct tty_struct *tty = nu->tty;
+	struct sk_buff *skb;
+
+restart:
+	clear_bit(NCI_UART_TX_WAKEUP, &nu->tx_state);
+
+	if (nu->ops.tx_start)
+		nu->ops.tx_start(nu);
+
+	while ((skb = nci_uart_dequeue(nu))) {
+		int len;
+
+		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+		len = tty->ops->write(tty, skb->data, skb->len);
+		skb_pull(skb, len);
+		if (skb->len) {
+			nu->tx_skb = skb;
+			break;
+		}
+		kfree_skb(skb);
+	}
+
+	if (test_bit(NCI_UART_TX_WAKEUP, &nu->tx_state))
+		goto restart;
+
+	if (nu->ops.tx_done && nci_uart_queue_empty(nu))
+		nu->ops.tx_done(nu);
+
+	clear_bit(NCI_UART_SENDING, &nu->tx_state);
+}
+
+static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver)
+{
+	struct nci_uart *nu = NULL;
+	int ret;
+
+	if (driver >= NCI_UART_DRIVER_MAX)
+		return -EINVAL;
+
+	if (!nci_uart_drivers[driver])
+		return -ENOENT;
+
+	nu = kzalloc(sizeof(*nu), GFP_KERNEL);
+	if (!nu)
+		return -ENOMEM;
+
+	memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart));
+	nu->tty = tty;
+	tty->disc_data = nu;
+	skb_queue_head_init(&nu->tx_q);
+	INIT_WORK(&nu->write_work, nci_uart_write_work);
+	spin_lock_init(&nu->rx_lock);
+
+	ret = nu->ops.open(nu);
+	if (ret) {
+		tty->disc_data = NULL;
+		kfree(nu);
+	} else if (!try_module_get(nu->owner)) {
+		nu->ops.close(nu);
+		tty->disc_data = NULL;
+		kfree(nu);
+		return -ENOENT;
+	}
+	return ret;
+}
+
+/* ------ LDISC part ------ */
+
+/* nci_uart_tty_open
+ *
+ *     Called when line discipline changed to NCI_UART.
+ *
+ * Arguments:
+ *     tty    pointer to tty info structure
+ * Return Value:
+ *     0 if success, otherwise error code
+ */
+static int nci_uart_tty_open(struct tty_struct *tty)
+{
+	/* Error if the tty has no write op instead of leaving an exploitable
+	 * hole
+	 */
+	if (!tty->ops->write)
+		return -EOPNOTSUPP;
+
+	tty->disc_data = NULL;
+	tty->receive_room = 65536;
+
+	/* Flush any pending characters in the driver and line discipline. */
+
+	/* FIXME: why is this needed. Note don't use ldisc_ref here as the
+	 * open path is before the ldisc is referencable.
+	 */
+
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
+	tty_driver_flush_buffer(tty);
+
+	return 0;
+}
+
+/* nci_uart_tty_close()
+ *
+ *    Called when the line discipline is changed to something
+ *    else, the tty is closed, or the tty detects a hangup.
+ */
+static void nci_uart_tty_close(struct tty_struct *tty)
+{
+	struct nci_uart *nu = (void *)tty->disc_data;
+
+	/* Detach from the tty */
+	tty->disc_data = NULL;
+
+	if (!nu)
+		return;
+
+	if (nu->tx_skb)
+		kfree_skb(nu->tx_skb);
+	if (nu->rx_skb)
+		kfree_skb(nu->rx_skb);
+
+	skb_queue_purge(&nu->tx_q);
+
+	nu->ops.close(nu);
+	nu->tty = NULL;
+	module_put(nu->owner);
+
+	cancel_work_sync(&nu->write_work);
+
+	kfree(nu);
+}
+
+/* nci_uart_tty_wakeup()
+ *
+ *    Callback for transmit wakeup. Called when low level
+ *    device driver can accept more send data.
+ *
+ * Arguments:        tty    pointer to associated tty instance data
+ * Return Value:    None
+ */
+static void nci_uart_tty_wakeup(struct tty_struct *tty)
+{
+	struct nci_uart *nu = (void *)tty->disc_data;
+
+	if (!nu)
+		return;
+
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+	if (tty != nu->tty)
+		return;
+
+	nci_uart_tx_wakeup(nu);
+}
+
+/* nci_uart_tty_receive()
+ *
+ *     Called by tty low level driver when receive data is
+ *     available.
+ *
+ * Arguments:  tty          pointer to tty isntance data
+ *             data         pointer to received data
+ *             flags        pointer to flags for data
+ *             count        count of received data in bytes
+ *
+ * Return Value:    None
+ */
+static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
+				 char *flags, int count)
+{
+	struct nci_uart *nu = (void *)tty->disc_data;
+
+	if (!nu || tty != nu->tty)
+		return;
+
+	spin_lock(&nu->rx_lock);
+	nu->ops.recv_buf(nu, (void *)data, flags, count);
+	spin_unlock(&nu->rx_lock);
+
+	tty_unthrottle(tty);
+}
+
+/* nci_uart_tty_ioctl()
+ *
+ *    Process IOCTL system call for the tty device.
+ *
+ * Arguments:
+ *
+ *    tty        pointer to tty instance data
+ *    file       pointer to open file object for device
+ *    cmd        IOCTL command code
+ *    arg        argument for IOCTL call (cmd dependent)
+ *
+ * Return Value:    Command dependent
+ */
+static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
+			      unsigned int cmd, unsigned long arg)
+{
+	struct nci_uart *nu = (void *)tty->disc_data;
+	int err = 0;
+
+	switch (cmd) {
+	case NCIUARTSETDRIVER:
+		if (!nu)
+			return nci_uart_set_driver(tty, (unsigned int)arg);
+		else
+			return -EBUSY;
+		break;
+	default:
+		err = n_tty_ioctl_helper(tty, file, cmd, arg);
+		break;
+	}
+
+	return err;
+}
+
+/* We don't provide read/write/poll interface for user space. */
+static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file,
+				 unsigned char __user *buf, size_t nr)
+{
+	return 0;
+}
+
+static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
+				  const unsigned char *data, size_t count)
+{
+	return 0;
+}
+
+static unsigned int nci_uart_tty_poll(struct tty_struct *tty,
+				      struct file *filp, poll_table *wait)
+{
+	return 0;
+}
+
+static int nci_uart_send(struct nci_uart *nu, struct sk_buff *skb)
+{
+	/* Queue TX packet */
+	skb_queue_tail(&nu->tx_q, skb);
+
+	/* Try to start TX (if possible) */
+	nci_uart_tx_wakeup(nu);
+
+	return 0;
+}
+
+/* -- Default recv_buf handler --
+ *
+ * This handler supposes that NCI frames are sent over UART link without any
+ * framing. It reads NCI header, retrieve the packet size and once all packet
+ * bytes are received it passes it to nci_uart driver for processing.
+ */
+static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
+				     char *flags, int count)
+{
+	int chunk_len;
+
+	if (!nu->ndev) {
+		nfc_err(nu->tty->dev,
+			"receive data from tty but no NCI dev is attached yet, drop buffer\n");
+		return 0;
+	}
+
+	/* Decode all incoming data in packets
+	 * and enqueue then for processing.
+	 */
+	while (count > 0) {
+		/* If this is the first data of a packet, allocate a buffer */
+		if (!nu->rx_skb) {
+			nu->rx_packet_len = -1;
+			nu->rx_skb = nci_skb_alloc(nu->ndev,
+						   NCI_MAX_PACKET_SIZE,
+						   GFP_KERNEL);
+			if (!nu->rx_skb)
+				return -ENOMEM;
+		}
+
+		/* Eat byte after byte till full packet header is received */
+		if (nu->rx_skb->len < NCI_CTRL_HDR_SIZE) {
+			*skb_put(nu->rx_skb, 1) = *data++;
+			--count;
+			continue;
+		}
+
+		/* Header was received but packet len was not read */
+		if (nu->rx_packet_len < 0)
+			nu->rx_packet_len = NCI_CTRL_HDR_SIZE +
+				nci_plen(nu->rx_skb->data);
+
+		/* Compute how many bytes are missing and how many bytes can
+		 * be consumed.
+		 */
+		chunk_len = nu->rx_packet_len - nu->rx_skb->len;
+		if (count < chunk_len)
+			chunk_len = count;
+		memcpy(skb_put(nu->rx_skb, chunk_len), data, chunk_len);
+		data += chunk_len;
+		count -= chunk_len;
+
+		/* Chcek if packet is fully received */
+		if (nu->rx_packet_len == nu->rx_skb->len) {
+			/* Pass RX packet to driver */
+			if (nu->ops.recv(nu, nu->rx_skb) != 0)
+				nfc_err(nu->tty->dev, "corrupted RX packet\n");
+			/* Next packet will be a new one */
+			nu->rx_skb = NULL;
+		}
+	}
+
+	return 0;
+}
+
+/* -- Default recv handler -- */
+static int nci_uart_default_recv(struct nci_uart *nu, struct sk_buff *skb)
+{
+	return nci_recv_frame(nu->ndev, skb);
+}
+
+int nci_uart_register(struct nci_uart *nu)
+{
+	if (!nu || !nu->ops.open ||
+	    !nu->ops.recv || !nu->ops.close)
+		return -EINVAL;
+
+	/* Set the send callback */
+	nu->ops.send = nci_uart_send;
+
+	/* Install default handlers if not overridden */
+	if (!nu->ops.recv_buf)
+		nu->ops.recv_buf = nci_uart_default_recv_buf;
+	if (!nu->ops.recv)
+		nu->ops.recv = nci_uart_default_recv;
+
+	/* Add this driver in the driver list */
+	if (!nci_uart_drivers[nu->driver]) {
+		pr_err("driver %d is already registered\n", nu->driver);
+		return -EBUSY;
+	}
+	nci_uart_drivers[nu->driver] = nu;
+
+	pr_info("NCI uart driver '%s [%d]' registered\n", nu->name, nu->driver);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nci_uart_register);
+
+void nci_uart_unregister(struct nci_uart *nu)
+{
+	pr_info("NCI uart driver '%s [%d]' unregistered\n", nu->name,
+		nu->driver);
+
+	/* Remove this driver from the driver list */
+	nci_uart_drivers[nu->driver] = NULL;
+}
+EXPORT_SYMBOL_GPL(nci_uart_unregister);
+
+void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl)
+{
+	struct ktermios new_termios;
+
+	if (!nu->tty)
+		return;
+
+	down_read(&nu->tty->termios_rwsem);
+	new_termios = nu->tty->termios;
+	up_read(&nu->tty->termios_rwsem);
+	tty_termios_encode_baud_rate(&new_termios, baudrate, baudrate);
+
+	if (flow_ctrl)
+		new_termios.c_cflag |= CRTSCTS;
+	else
+		new_termios.c_cflag &= ~CRTSCTS;
+
+	tty_set_termios(nu->tty, &new_termios);
+}
+EXPORT_SYMBOL_GPL(nci_uart_set_config);
+
+static struct tty_ldisc_ops nci_uart_ldisc = {
+	.magic		= TTY_LDISC_MAGIC,
+	.owner		= THIS_MODULE,
+	.name		= "n_nci",
+	.open		= nci_uart_tty_open,
+	.close		= nci_uart_tty_close,
+	.read		= nci_uart_tty_read,
+	.write		= nci_uart_tty_write,
+	.poll		= nci_uart_tty_poll,
+	.receive_buf	= nci_uart_tty_receive,
+	.write_wakeup	= nci_uart_tty_wakeup,
+	.ioctl		= nci_uart_tty_ioctl,
+};
+
+static int __init nci_uart_init(void)
+{
+	memset(nci_uart_drivers, 0, sizeof(nci_uart_drivers));
+	return tty_register_ldisc(N_NCI, &nci_uart_ldisc);
+}
+
+static void __exit nci_uart_exit(void)
+{
+	tty_unregister_ldisc(N_NCI);
+}
+
+module_init(nci_uart_init);
+module_exit(nci_uart_exit);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("NFC NCI UART driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_LDISC(N_NCI);
-- 
cgit v1.2.3


From ca0f6a5cd99e0c6ba4bb78dc402817f636370f26 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 13 Jun 2015 19:45:33 +0200
Subject: netfilter: ipset: Fix coding styles reported by checkpatch.pl

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h       |   5 +-
 include/uapi/linux/netfilter/ipset/ip_set.h  |   6 +-
 net/netfilter/ipset/ip_set_bitmap_gen.h      |  11 +-
 net/netfilter/ipset/ip_set_bitmap_ip.c       |  12 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c    |  21 +--
 net/netfilter/ipset/ip_set_bitmap_port.c     |   7 +-
 net/netfilter/ipset/ip_set_core.c            | 201 +++++++++++++--------------
 net/netfilter/ipset/ip_set_getport.c         |  13 +-
 net/netfilter/ipset/ip_set_hash_gen.h        |  55 ++++----
 net/netfilter/ipset/ip_set_hash_ip.c         |   4 +-
 net/netfilter/ipset/ip_set_hash_ipmark.c     |   9 +-
 net/netfilter/ipset/ip_set_hash_ipport.c     |  14 +-
 net/netfilter/ipset/ip_set_hash_ipportip.c   |  16 ++-
 net/netfilter/ipset/ip_set_hash_ipportnet.c  |  19 ++-
 net/netfilter/ipset/ip_set_hash_mac.c        |   6 +-
 net/netfilter/ipset/ip_set_hash_net.c        |   8 +-
 net/netfilter/ipset/ip_set_hash_netiface.c   |  25 ++--
 net/netfilter/ipset/ip_set_hash_netnet.c     |  46 +++---
 net/netfilter/ipset/ip_set_hash_netport.c    |  19 ++-
 net/netfilter/ipset/ip_set_hash_netportnet.c |  54 +++----
 net/netfilter/ipset/ip_set_list_set.c        |  11 +-
 net/netfilter/ipset/pfxlen.c                 |  16 +--
 net/netfilter/xt_set.c                       |  44 +++---
 23 files changed, 327 insertions(+), 295 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 19b4969a25fe..48bb01edcf30 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -349,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
 			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
 					  skbinfo->skbmarkmask))) ||
 	       (skbinfo->skbprio &&
-	        nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
 			      cpu_to_be32(skbinfo->skbprio))) ||
 	       (skbinfo->skbqueue &&
-	        nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
 			     cpu_to_be16(skbinfo->skbqueue)));
-
 }
 
 static inline void
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 5ab4e60894cf..63b2e34f1b60 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -15,12 +15,12 @@
 /* The protocol version */
 #define IPSET_PROTOCOL		6
 
-/* The maximum permissible comment length we will accept over netlink */
-#define IPSET_MAX_COMMENT_SIZE	255
-
 /* The max length of strings including NUL: set and type identifiers */
 #define IPSET_MAXNAMELEN	32
 
+/* The maximum permissible comment length we will accept over netlink */
+#define IPSET_MAX_COMMENT_SIZE	255
+
 /* Message types and commands */
 enum ipset_cmd {
 	IPSET_CMD_NONE,
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 86429f369128..d05e759ed0fa 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	struct mtype *map = set->data;
 
 	init_timer(&map->gc);
-	map->gc.data = (unsigned long) set;
+	map->gc.data = (unsigned long)set;
 	map->gc.function = gc;
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
@@ -223,7 +223,7 @@ mtype_list(const struct ip_set *set,
 		if (!test_bit(id, map->members) ||
 		    (SET_WITH_TIMEOUT(set) &&
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
-		     mtype_is_filled((const struct mtype_elem *) x) &&
+		     mtype_is_filled((const struct mtype_elem *)x) &&
 #endif
 		     ip_set_timeout_expired(ext_timeout(x, set))))
 			continue;
@@ -240,7 +240,7 @@ mtype_list(const struct ip_set *set,
 		if (mtype_do_list(skb, map, id, set->dsize))
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, x,
-		    mtype_is_filled((const struct mtype_elem *) x)))
+		    mtype_is_filled((const struct mtype_elem *)x)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
@@ -266,13 +266,14 @@ out:
 static void
 mtype_gc(unsigned long ul_set)
 {
-	struct ip_set *set = (struct ip_set *) ul_set;
+	struct ip_set *set = (struct ip_set *)ul_set;
 	struct mtype *map = set->data;
 	void *x;
 	u32 id;
 
 	/* We run parallel with other readers (test element)
-	 * but adding/deleting new entries is locked out */
+	 * but adding/deleting new entries is locked out
+	 */
 	spin_lock_bh(&set->lock);
 	for (id = 0; id < map->elements; id++)
 		if (mtype_gc_test(id, map, set->dsize)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index b8ce474c038d..64a564334418 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -59,7 +59,7 @@ struct bitmap_ip_adt_elem {
 static inline u32
 ip_to_id(const struct bitmap_ip *m, u32 ip)
 {
-	return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
+	return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
 }
 
 /* Common functions */
@@ -175,8 +175,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 		if (!cidr || cidr > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 		ip_set_mask_from_to(ip, ip_to, cidr);
-	} else
+	} else {
 		ip_to = ip;
+	}
 
 	if (ip_to > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
@@ -187,8 +188,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
@@ -278,8 +279,9 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		if (cidr >= HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 		ip_set_mask_from_to(first_ip, last_ip, cidr);
-	} else
+	} else {
 		return -IPSET_ERR_PROTOCOL;
+	}
 
 	if (tb[IPSET_ATTR_NETMASK]) {
 		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index fe00e87decc8..1430535118fb 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -90,7 +90,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
 		return 0;
 	elem = get_elem(map->extensions, e->id, dsize);
 	if (elem->filled == MAC_FILLED)
-		return e->ether == NULL ||
+		return !e->ether ||
 		       ether_addr_equal(e->ether, elem->ether);
 	/* Trigger kernel to fill out the ethernet address */
 	return -EAGAIN;
@@ -131,7 +131,8 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
 		/* If MAC is unset yet, we store plain timeout value
 		 * because the timer is not activated yet
 		 * and we can reuse it later when MAC is filled out,
-		 * possibly by the kernel */
+		 * possibly by the kernel
+		 */
 		if (e->ether)
 			ip_set_timeout_set(timeout, t);
 		else
@@ -155,7 +156,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 				/* memcpy isn't atomic */
 				clear_bit(e->id, map->members);
 				smp_mb__after_atomic();
-				memcpy(elem->ether, e->ether, ETH_ALEN);
+				ether_addr_copy(elem->ether, e->ether);
 			}
 			return IPSET_ADD_FAILED;
 		} else if (!e->ether)
@@ -164,19 +165,18 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 		/* Fill the MAC address and trigger the timer activation */
 		clear_bit(e->id, map->members);
 		smp_mb__after_atomic();
-		memcpy(elem->ether, e->ether, ETH_ALEN);
+		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
 		return IPSET_ADD_START_STORED_TIMEOUT;
 	} else if (e->ether) {
 		/* We can store MAC too */
-		memcpy(elem->ether, e->ether, ETH_ALEN);
+		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
 		return 0;
-	} else {
-		elem->filled = MAC_UNSET;
-		/* MAC is not stored yet, don't start timer */
-		return IPSET_ADD_STORE_PLAIN_TIMEOUT;
 	}
+	elem->filled = MAC_UNSET;
+	/* MAC is not stored yet, don't start timer */
+	return IPSET_ADD_STORE_PLAIN_TIMEOUT;
 }
 
 static inline int
@@ -352,8 +352,9 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		if (cidr >= HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 		ip_set_mask_from_to(first_ip, last_ip, cidr);
-	} else
+	} else {
 		return -IPSET_ERR_PROTOCOL;
+	}
 
 	elements = (u64)last_ip - first_ip + 1;
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 2d360f951d18..5338ccd5da46 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -162,8 +162,9 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 			if (port < map->first_port)
 				return -IPSET_ERR_BITMAP_RANGE;
 		}
-	} else
+	} else {
 		port_to = port;
+	}
 
 	if (port_to > map->last_port)
 		return -IPSET_ERR_BITMAP_RANGE;
@@ -174,8 +175,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 2b21a1983a98..338b4047776f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -35,6 +35,7 @@ struct ip_set_net {
 	bool		is_deleted;	/* deleted by ip_set_net_exit */
 	bool		is_destroyed;	/* all sets are destroyed */
 };
+
 static int ip_set_net_id __read_mostly;
 
 static inline struct ip_set_net *ip_set_pernet(struct net *net)
@@ -60,8 +61,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 #define ip_set(inst, id)		\
 	ip_set_dereference((inst)->ip_set_list)[id]
 
-/*
- * The set types are implemented in modules and registered set types
+/* The set types are implemented in modules and registered set types
  * can be found in ip_set_type_list. Adding/deleting types is
  * serialized by ip_set_type_mutex.
  */
@@ -131,7 +131,8 @@ __find_set_type_get(const char *name, u8 family, u8 revision,
 		goto unlock;
 	}
 	/* Make sure the type is already loaded
-	 * but we don't support the revision */
+	 * but we don't support the revision
+	 */
 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
 		if (STRNCMP(type->name, name)) {
 			err = -IPSET_ERR_FIND_TYPE;
@@ -290,7 +291,7 @@ static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
 int
 ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
 {
-	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
 
 	if (unlikely(!flag_nested(nla)))
 		return -IPSET_ERR_PROTOCOL;
@@ -307,7 +308,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
 int
 ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 {
-	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
 
 	if (unlikely(!flag_nested(nla)))
 		return -IPSET_ERR_PROTOCOL;
@@ -318,7 +319,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 		return -IPSET_ERR_PROTOCOL;
 
 	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
-		sizeof(struct in6_addr));
+	       sizeof(struct in6_addr));
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
@@ -467,8 +468,7 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 }
 EXPORT_SYMBOL_GPL(ip_set_put_extensions);
 
-/*
- * Creating/destroying/renaming/swapping affect the existence and
+/* Creating/destroying/renaming/swapping affect the existence and
  * the properties of a set. All of these can be executed from userspace
  * only and serialized by the nfnl mutex indirectly from nfnetlink.
  *
@@ -495,8 +495,7 @@ __ip_set_put(struct ip_set *set)
 	write_unlock_bh(&ip_set_ref_lock);
 }
 
-/*
- * Add, del and test set entries from kernel.
+/* Add, del and test set entries from kernel.
  *
  * The set behind the index must exist and must be referenced
  * so it can't be destroyed (or changed) under our foot.
@@ -524,7 +523,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
-	BUG_ON(set == NULL);
+	BUG_ON(!set);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (opt->dim < set->type->dimension ||
@@ -563,7 +562,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 			dev_net(par->in ? par->in : par->out), index);
 	int ret;
 
-	BUG_ON(set == NULL);
+	BUG_ON(!set);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (opt->dim < set->type->dimension ||
@@ -586,7 +585,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
-	BUG_ON(set == NULL);
+	BUG_ON(!set);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (opt->dim < set->type->dimension ||
@@ -601,8 +600,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(ip_set_del);
 
-/*
- * Find set by name, reference it once. The reference makes sure the
+/* Find set by name, reference it once. The reference makes sure the
  * thing pointed to, does not go away under our feet.
  *
  */
@@ -616,7 +614,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
 	rcu_read_lock();
 	for (i = 0; i < inst->ip_set_max; i++) {
 		s = rcu_dereference(inst->ip_set_list)[i];
-		if (s != NULL && STRNCMP(s->name, name)) {
+		if (s && STRNCMP(s->name, name)) {
 			__ip_set_get(s);
 			index = i;
 			*set = s;
@@ -629,8 +627,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_byname);
 
-/*
- * If the given set pointer points to a valid set, decrement
+/* If the given set pointer points to a valid set, decrement
  * reference count by 1. The caller shall not assume the index
  * to be valid, after calling this function.
  *
@@ -643,7 +640,7 @@ __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
 
 	rcu_read_lock();
 	set = rcu_dereference(inst->ip_set_list)[index];
-	if (set != NULL)
+	if (set)
 		__ip_set_put(set);
 	rcu_read_unlock();
 }
@@ -657,8 +654,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
 }
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
-/*
- * Get the name of a set behind a set index.
+/* Get the name of a set behind a set index.
  * We assume the set is referenced, so it does exist and
  * can't be destroyed. The set cannot be renamed due to
  * the referencing either.
@@ -669,7 +665,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
 {
 	const struct ip_set *set = ip_set_rcu_get(net, index);
 
-	BUG_ON(set == NULL);
+	BUG_ON(!set);
 	BUG_ON(set->ref == 0);
 
 	/* Referenced, so it's safe */
@@ -677,13 +673,11 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
 }
 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 
-/*
- * Routines to call by external subsystems, which do not
+/* Routines to call by external subsystems, which do not
  * call nfnl_lock for us.
  */
 
-/*
- * Find set by index, reference it once. The reference makes sure the
+/* Find set by index, reference it once. The reference makes sure the
  * thing pointed to, does not go away under our feet.
  *
  * The nfnl mutex is used in the function.
@@ -709,8 +703,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
 
-/*
- * If the given set pointer points to a valid set, decrement
+/* If the given set pointer points to a valid set, decrement
  * reference count by 1. The caller shall not assume the index
  * to be valid, after calling this function.
  *
@@ -725,15 +718,14 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
 	nfnl_lock(NFNL_SUBSYS_IPSET);
 	if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
 		set = ip_set(inst, index);
-		if (set != NULL)
+		if (set)
 			__ip_set_put(set);
 	}
 	nfnl_unlock(NFNL_SUBSYS_IPSET);
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 
-/*
- * Communication protocol with userspace over netlink.
+/* Communication protocol with userspace over netlink.
  *
  * The commands are serialized by the nfnl mutex.
  */
@@ -760,7 +752,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
 
 	nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
 			sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	if (!nlh)
 		return NULL;
 
 	nfmsg = nlmsg_data(nlh);
@@ -793,7 +785,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
 	*id = IPSET_INVALID_ID;
 	for (i = 0; i < inst->ip_set_max; i++) {
 		set = ip_set(inst, i);
-		if (set != NULL && STRNCMP(set->name, name)) {
+		if (set && STRNCMP(set->name, name)) {
 			*id = i;
 			break;
 		}
@@ -819,7 +811,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
 	*index = IPSET_INVALID_ID;
 	for (i = 0;  i < inst->ip_set_max; i++) {
 		s = ip_set(inst, i);
-		if (s == NULL) {
+		if (!s) {
 			if (*index == IPSET_INVALID_ID)
 				*index = i;
 		} else if (STRNCMP(name, s->name)) {
@@ -851,18 +843,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *clash = NULL;
 	ip_set_id_t index = IPSET_INVALID_ID;
-	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
+	struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
 	const char *name, *typename;
 	u8 family, revision;
 	u32 flags = flag_exist(nlh);
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
-		     attr[IPSET_ATTR_TYPENAME] == NULL ||
-		     attr[IPSET_ATTR_REVISION] == NULL ||
-		     attr[IPSET_ATTR_FAMILY] == NULL ||
-		     (attr[IPSET_ATTR_DATA] != NULL &&
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_TYPENAME] ||
+		     !attr[IPSET_ATTR_REVISION] ||
+		     !attr[IPSET_ATTR_FAMILY] ||
+		     (attr[IPSET_ATTR_DATA] &&
 		      !flag_nested(attr[IPSET_ATTR_DATA]))))
 		return -IPSET_ERR_PROTOCOL;
 
@@ -873,11 +865,10 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
 		 name, typename, family_name(family), revision);
 
-	/*
-	 * First, and without any locks, allocate and initialize
+	/* First, and without any locks, allocate and initialize
 	 * a normal base set structure.
 	 */
-	set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
+	set = kzalloc(sizeof(*set), GFP_KERNEL);
 	if (!set)
 		return -ENOMEM;
 	spin_lock_init(&set->lock);
@@ -885,21 +876,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	set->family = family;
 	set->revision = revision;
 
-	/*
-	 * Next, check that we know the type, and take
+	/* Next, check that we know the type, and take
 	 * a reference on the type, to make sure it stays available
 	 * while constructing our new set.
 	 *
 	 * After referencing the type, we try to create the type
 	 * specific part of the set without holding any locks.
 	 */
-	ret = find_set_type_get(typename, family, revision, &(set->type));
+	ret = find_set_type_get(typename, family, revision, &set->type);
 	if (ret)
 		goto out;
 
-	/*
-	 * Without holding any locks, create private part.
-	 */
+	/* Without holding any locks, create private part. */
 	if (attr[IPSET_ATTR_DATA] &&
 	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
 			     set->type->create_policy)) {
@@ -913,8 +901,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 
 	/* BTW, ret==0 here. */
 
-	/*
-	 * Here, we have a valid, constructed set and we are protected
+	/* Here, we have a valid, constructed set and we are protected
 	 * by the nfnl mutex. Find the first free index in ip_set_list
 	 * and check clashing.
 	 */
@@ -937,7 +924,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 			/* Wraparound */
 			goto cleanup;
 
-		list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
+		list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
 		if (!list)
 			goto cleanup;
 		/* nfnl mutex is held, both lists are valid */
@@ -951,12 +938,11 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		inst->ip_set_max = i;
 		kfree(tmp);
 		ret = 0;
-	} else if (ret)
+	} else if (ret) {
 		goto cleanup;
+	}
 
-	/*
-	 * Finally! Add our shiny new set to the list, and be done.
-	 */
+	/* Finally! Add our shiny new set to the list, and be done. */
 	pr_debug("create: '%s' created with index %u!\n", set->name, index);
 	ip_set(inst, index) = set;
 
@@ -1018,7 +1004,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < inst->ip_set_max; i++) {
 			s = ip_set(inst, i);
-			if (s != NULL && s->ref) {
+			if (s && s->ref) {
 				ret = -IPSET_ERR_BUSY;
 				goto out;
 			}
@@ -1037,7 +1023,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	} else {
 		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
 				    &i);
-		if (s == NULL) {
+		if (!s) {
 			ret = -ENOENT;
 			goto out;
 		} else if (s->ref) {
@@ -1082,12 +1068,12 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < inst->ip_set_max; i++) {
 			s = ip_set(inst, i);
-			if (s != NULL)
+			if (s)
 				ip_set_flush_set(s);
 		}
 	} else {
 		s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-		if (s == NULL)
+		if (!s)
 			return -ENOENT;
 
 		ip_set_flush_set(s);
@@ -1119,12 +1105,12 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
-		     attr[IPSET_ATTR_SETNAME2] == NULL))
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_SETNAME2]))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	read_lock_bh(&ip_set_ref_lock);
@@ -1136,7 +1122,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
 	for (i = 0; i < inst->ip_set_max; i++) {
 		s = ip_set(inst, i);
-		if (s != NULL && STRNCMP(s->name, name2)) {
+		if (s && STRNCMP(s->name, name2)) {
 			ret = -IPSET_ERR_EXIST_SETNAME2;
 			goto out;
 		}
@@ -1168,23 +1154,24 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	char from_name[IPSET_MAXNAMELEN];
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
-		     attr[IPSET_ATTR_SETNAME2] == NULL))
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_SETNAME2]))
 		return -IPSET_ERR_PROTOCOL;
 
 	from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
 			       &from_id);
-	if (from == NULL)
+	if (!from)
 		return -ENOENT;
 
 	to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
 			     &to_id);
-	if (to == NULL)
+	if (!to)
 		return -IPSET_ERR_EXIST_SETNAME2;
 
 	/* Features must not change.
-	 * Not an artificial restriction anymore, as we must prevent
-	 * possible loops created by swapping in setlist type of sets. */
+	 * Not an artifical restriction anymore, as we must prevent
+	 * possible loops created by swapping in setlist type of sets.
+	 */
 	if (!(from->type->features == to->type->features &&
 	      from->family == to->family))
 		return -IPSET_ERR_TYPE_MISMATCH;
@@ -1246,7 +1233,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
 	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
-	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+	struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
 	struct nlattr *attr = (void *)nlh + min_len;
 	u32 dump_type;
 	ip_set_id_t index;
@@ -1260,16 +1247,18 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 
 		set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
 				      &index);
-		if (set == NULL)
+		if (!set)
 			return -ENOENT;
 
 		dump_type = DUMP_ONE;
 		cb->args[IPSET_CB_INDEX] = index;
-	} else
+	} else {
 		dump_type = DUMP_ALL;
+	}
 
 	if (cda[IPSET_ATTR_FLAGS]) {
 		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
+
 		dump_type |= (f << 16);
 	}
 	cb->args[IPSET_CB_NET] = (unsigned long)inst;
@@ -1295,7 +1284,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		if (ret < 0) {
 			nlh = nlmsg_hdr(cb->skb);
 			/* We have to create and send the error message
-			 * manually :-( */
+			 * manually :-(
+			 */
 			if (nlh->nlmsg_flags & NLM_F_ACK)
 				netlink_ack(cb->skb, nlh, ret);
 			return ret;
@@ -1313,7 +1303,7 @@ dump_last:
 	pr_debug("dump type, flag: %u %u index: %ld\n",
 		 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
 	for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
-		index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
+		index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
 		write_lock_bh(&ip_set_ref_lock);
 		set = ip_set(inst, index);
 		is_destroyed = inst->is_destroyed;
@@ -1480,12 +1470,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 		size_t payload = min(SIZE_MAX,
 				     sizeof(*errmsg) + nlmsg_len(nlh));
 		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
-		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+		struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
 		struct nlattr *cmdattr;
 		u32 *errline;
 
 		skb2 = nlmsg_new(payload, GFP_KERNEL);
-		if (skb2 == NULL)
+		if (!skb2)
 			return -ENOMEM;
 		rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
 				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
@@ -1502,7 +1492,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 
 		*errline = lineno;
 
-		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
+				MSG_DONTWAIT);
 		/* Signal netlink not to send its ACK/errmsg.  */
 		return -EINTR;
 	}
@@ -1517,25 +1508,25 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 {
 	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
-	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	const struct nlattr *nla;
 	u32 flags = flag_exist(nlh);
 	bool use_lineno;
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     !attr[IPSET_ATTR_SETNAME] ||
 		     !((attr[IPSET_ATTR_DATA] != NULL) ^
 		       (attr[IPSET_ATTR_ADT] != NULL)) ||
-		     (attr[IPSET_ATTR_DATA] != NULL &&
+		     (attr[IPSET_ATTR_DATA] &&
 		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
-		     (attr[IPSET_ATTR_ADT] != NULL &&
+		     (attr[IPSET_ATTR_ADT] &&
 		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
-		       attr[IPSET_ATTR_LINENO] == NULL))))
+		       !attr[IPSET_ATTR_LINENO]))))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1572,25 +1563,25 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 {
 	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
-	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	const struct nlattr *nla;
 	u32 flags = flag_exist(nlh);
 	bool use_lineno;
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     !attr[IPSET_ATTR_SETNAME] ||
 		     !((attr[IPSET_ATTR_DATA] != NULL) ^
 		       (attr[IPSET_ATTR_ADT] != NULL)) ||
-		     (attr[IPSET_ATTR_DATA] != NULL &&
+		     (attr[IPSET_ATTR_DATA] &&
 		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
-		     (attr[IPSET_ATTR_ADT] != NULL &&
+		     (attr[IPSET_ATTR_ADT] &&
 		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
-		       attr[IPSET_ATTR_LINENO] == NULL))))
+		       !attr[IPSET_ATTR_LINENO]))))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1627,17 +1618,17 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 {
 	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
-	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
-		     attr[IPSET_ATTR_DATA] == NULL ||
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_DATA] ||
 		     !flag_nested(attr[IPSET_ATTR_DATA])))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
@@ -1668,15 +1659,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL))
+		     !attr[IPSET_ATTR_SETNAME]))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL)
+	if (!skb2)
 		return -ENOMEM;
 
 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1725,8 +1716,8 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_TYPENAME] == NULL ||
-		     attr[IPSET_ATTR_FAMILY] == NULL))
+		     !attr[IPSET_ATTR_TYPENAME] ||
+		     !attr[IPSET_ATTR_FAMILY]))
 		return -IPSET_ERR_PROTOCOL;
 
 	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
@@ -1736,7 +1727,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
 		return ret;
 
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL)
+	if (!skb2)
 		return -ENOMEM;
 
 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1781,11 +1772,11 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
 	struct nlmsghdr *nlh2;
 	int ret = 0;
 
-	if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
+	if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
 		return -IPSET_ERR_PROTOCOL;
 
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL)
+	if (!skb2)
 		return -ENOMEM;
 
 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1913,7 +1904,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		ret = -EFAULT;
 		goto done;
 	}
-	op = (unsigned int *) data;
+	op = (unsigned int *)data;
 
 	if (*op < IP_SET_OP_VERSION) {
 		/* Check the version at the beginning of operations */
@@ -2025,7 +2016,7 @@ ip_set_net_init(struct net *net)
 	if (inst->ip_set_max >= IPSET_INVALID_ID)
 		inst->ip_set_max = IPSET_INVALID_ID - 1;
 
-	list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
+	list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
 	if (!list)
 		return -ENOMEM;
 	inst->is_deleted = false;
@@ -2061,11 +2052,11 @@ static struct pernet_operations ip_set_net_ops = {
 	.size	= sizeof(struct ip_set_net)
 };
 
-
 static int __init
 ip_set_init(void)
 {
 	int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+
 	if (ret != 0) {
 		pr_err("ip_set: cannot register with nfnetlink.\n");
 		return ret;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 1981f021cc60..42c3e3ba1b94 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -30,7 +30,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const struct tcphdr *th;
 
 		th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
-		if (th == NULL)
+		if (!th)
 			/* No choice either */
 			return false;
 
@@ -42,7 +42,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const sctp_sctphdr_t *sh;
 
 		sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
-		if (sh == NULL)
+		if (!sh)
 			/* No choice either */
 			return false;
 
@@ -55,7 +55,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const struct udphdr *uh;
 
 		uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
-		if (uh == NULL)
+		if (!uh)
 			/* No choice either */
 			return false;
 
@@ -67,7 +67,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const struct icmphdr *ic;
 
 		ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
-		if (ic == NULL)
+		if (!ic)
 			return false;
 
 		*port = (__force __be16)htons((ic->type << 8) | ic->code);
@@ -78,7 +78,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const struct icmp6hdr *ic;
 
 		ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
-		if (ic == NULL)
+		if (!ic)
 			return false;
 
 		*port = (__force __be16)
@@ -116,7 +116,8 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
 			return false;
 		default:
 			/* Other protocols doesn't have ports,
-			   so we can match fragments */
+			 * so we can match fragments.
+			 */
 			*proto = protocol;
 			return true;
 		}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index f352cc022010..afe905c208af 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -35,7 +35,7 @@
 /* Number of elements to store in an initial array block */
 #define AHASH_INIT_SIZE			4
 /* Max number of elements to store in an array block */
-#define AHASH_MAX_SIZE			(3*AHASH_INIT_SIZE)
+#define AHASH_MAX_SIZE			(3 * AHASH_INIT_SIZE)
 /* Max muber of elements in the array block when tuned */
 #define AHASH_MAX_TUNED			64
 
@@ -57,6 +57,7 @@ tune_ahash_max(u8 curr, u32 multi)
 	 */
 	return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
 }
+
 #define TUNE_AHASH_MAX(h, multi)	\
 	((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
 #else
@@ -256,7 +257,7 @@ htable_bits(u32 hashsize)
 #endif
 
 #define HKEY(data, initval, htable_bits)			\
-(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval)	\
+(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval)	\
 	& jhash_mask(htable_bits))
 
 #ifndef htype
@@ -299,11 +300,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 
 	/* Add in increasing prefix order, so larger cidr first */
 	for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
-		if (j != -1)
+		if (j != -1) {
 			continue;
-		else if (h->nets[i].cidr[n] < cidr)
+		} else if (h->nets[i].cidr[n] < cidr) {
 			j = i;
-		else if (h->nets[i].cidr[n] == cidr) {
+		} else if (h->nets[i].cidr[n] == cidr) {
 			h->nets[cidr - 1].nets[n]++;
 			return;
 		}
@@ -322,15 +323,15 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 	u8 i, j, net_end = nets_length - 1;
 
 	for (i = 0; i < nets_length; i++) {
-	        if (h->nets[i].cidr[n] != cidr)
-	                continue;
+		if (h->nets[i].cidr[n] != cidr)
+			continue;
 		h->nets[cidr - 1].nets[n]--;
 		if (h->nets[cidr - 1].nets[n] > 0)
-                        return;
+			return;
 		for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
-		        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+			h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
 		h->nets[j].cidr[n] = 0;
-                return;
+		return;
 	}
 }
 #endif
@@ -426,8 +427,8 @@ mtype_destroy(struct ip_set *set)
 	if (SET_WITH_TIMEOUT(set))
 		del_timer_sync(&h->gc);
 
-	mtype_ahash_destroy(set, __ipset_dereference_protected(h->table, 1),
-			    true);
+	mtype_ahash_destroy(set,
+			    __ipset_dereference_protected(h->table, 1), true);
 	kfree(h);
 
 	set->data = NULL;
@@ -439,7 +440,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	struct htype *h = set->data;
 
 	init_timer(&h->gc);
-	h->gc.data = (unsigned long) set;
+	h->gc.data = (unsigned long)set;
 	h->gc.function = gc;
 	h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&h->gc);
@@ -530,7 +531,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 static void
 mtype_gc(unsigned long ul_set)
 {
-	struct ip_set *set = (struct ip_set *) ul_set;
+	struct ip_set *set = (struct ip_set *)ul_set;
 	struct htype *h = set->data;
 
 	pr_debug("called\n");
@@ -544,7 +545,8 @@ mtype_gc(unsigned long ul_set)
 
 /* Resize a hash: create a new hash table with doubling the hashsize
  * and inserting the elements to it. Repeat until we succeed or
- * fail due to memory pressures. */
+ * fail due to memory pressures.
+ */
 static int
 mtype_resize(struct ip_set *set, bool retried)
 {
@@ -687,7 +689,8 @@ cleanup:
 }
 
 /* Add an element to a hash and update the internal counters when succeeded,
- * otherwise report the proper error code. */
+ * otherwise report the proper error code.
+ */
 static int
 mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	  struct ip_set_ext *mext, u32 flags)
@@ -926,7 +929,8 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 
 #ifdef IP_SET_HASH_WITH_NETS
 /* Special test function which takes into account the different network
- * sizes added to the set */
+ * sizes added to the set
+ */
 static int
 mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 		 const struct ip_set_ext *ext,
@@ -1004,7 +1008,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	t = rcu_dereference_bh(h->table);
 #ifdef IP_SET_HASH_WITH_NETS
 	/* If we test an IP address and not a network address,
-	 * try all possible network sizes */
+	 * try all possible network sizes
+	 */
 	for (i = 0; i < IPSET_NET_COUNT; i++)
 		if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
 			break;
@@ -1148,8 +1153,8 @@ mtype_list(const struct ip_set *set,
 					nla_nest_cancel(skb, atd);
 					ret = -EMSGSIZE;
 					goto out;
-				} else
-					goto nla_put_failure;
+				}
+				goto nla_put_failure;
 			}
 			if (mtype_data_list(skb, e))
 				goto nla_put_failure;
@@ -1171,8 +1176,9 @@ nla_put_failure:
 			set->name);
 		cb->args[IPSET_CB_ARG0] = 0;
 		ret = -EMSGSIZE;
-	} else
+	} else {
 		ipset_nest_end(skb, atd);
+	}
 out:
 	rcu_read_unlock();
 	return ret;
@@ -1180,12 +1186,13 @@ out:
 
 static int
 IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
-	    const struct xt_action_param *par,
-	    enum ipset_adt adt, struct ip_set_adt_opt *opt);
+			  const struct xt_action_param *par,
+			  enum ipset_adt adt, struct ip_set_adt_opt *opt);
 
 static int
 IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
-	    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+			  enum ipset_adt adt, u32 *lineno, u32 flags,
+			  bool retried);
 
 static const struct ip_set_type_variant mtype_variant = {
 	.kadt	= mtype_kadt,
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index f54d7069d633..9d6bf19f7b78 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -158,8 +158,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index f8fbc325ad34..a0695a2ab585 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -155,8 +155,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
@@ -206,7 +206,6 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
 #define IP_SET_EMIT_CREATE
 #include "ip_set_hash_gen.h"
 
-
 static int
 hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
@@ -268,10 +267,8 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
 	ret = adtfn(set, &e, &ext, &ext, flags);
 	if (ret && !ip_set_eexist(ret, flags))
 		return ret;
-	else
-		ret = 0;
 
-	return ret;
+	return 0;
 }
 
 static struct ip_set_type hash_ipmark_type __read_mostly = {
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 9a31db8ccca6..9d84b3dff603 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -140,8 +140,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
@@ -187,8 +188,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-			else
-				ret = 0;
+
+			ret = 0;
 		}
 	}
 	return ret;
@@ -305,8 +306,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
@@ -329,8 +331,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index fc42489f8795..215b7b942038 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -63,7 +63,7 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
 
 static bool
 hash_ipportip4_data_list(struct sk_buff *skb,
-		       const struct hash_ipportip4_elem *data)
+			 const struct hash_ipportip4_elem *data)
 {
 	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
 	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) ||
@@ -147,8 +147,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
@@ -194,8 +195,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-			else
-				ret = 0;
+
+			ret = 0;
 		}
 	}
 	return ret;
@@ -320,8 +321,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
@@ -344,8 +346,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 2a69b9bf66b8..9ca719625ea3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -209,14 +209,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -263,8 +265,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip2_from, ip2_to);
 		if (ip2_from + UINT_MAX == ip2_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip);
@@ -287,8 +290,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 				if (ret && !ip_set_eexist(ret, flags))
 					return ret;
-				else
-					ret = 0;
+
+				ret = 0;
 				ip2 = ip2_last + 1;
 			}
 		}
@@ -466,14 +469,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -497,8 +502,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 112aff3cda96..f1e7d2c0f685 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -89,10 +89,10 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		return 0;
 
 	if (skb_mac_header(skb) < skb->head ||
-	     (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
 		return -EINVAL;
 
-	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+	ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
 	if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
 		return -EINVAL;
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
@@ -116,7 +116,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
 	ret = ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
-	memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+	ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]));
 	if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
 		return -IPSET_ERR_HASH_ELEM;
 
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index e49b1d010d30..3e4bffdc1cc0 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -169,6 +169,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -176,7 +177,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
 		e.ip = htonl(ip & ip_set_hostmask(e.cidr));
 		ret = adtfn(set, &e, &ext, &ext, flags);
-		return ip_set_enomatch(ret, flags, adt, set) ? -ret:
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
@@ -198,8 +199,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 		ret = adtfn(set, &e, &ext, &ext, flags);
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 		ip = last + 1;
 	}
 	return ret;
@@ -339,6 +340,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 42c893e08842..43d8c9896fa3 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -143,7 +143,7 @@ static const char *get_physindev_name(const struct sk_buff *skb)
 	return dev ? dev->name : NULL;
 }
 
-static const char *get_phyoutdev_name(const struct sk_buff *skb)
+static const char *get_physoutdev_name(const struct sk_buff *skb)
 {
 	struct net_device *dev = nf_bridge_get_physoutdev(skb);
 
@@ -178,15 +178,16 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 		const char *eiface = SRCDIR ? get_physindev_name(skb) :
-					      get_phyoutdev_name(skb);
+					      get_physoutdev_name(skb);
 
 		if (!eiface)
 			return -EINVAL;
 		STRLCPY(e.iface, eiface);
 		e.physdev = 1;
 #endif
-	} else
+	} else {
 		STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+	}
 
 	if (strlen(e.iface) == 0)
 		return -EINVAL;
@@ -229,6 +230,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_PHYSDEV)
 			e.physdev = 1;
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
@@ -249,8 +251,9 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (ip + UINT_MAX == ip_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip);
@@ -261,8 +264,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 		ip = last + 1;
 	}
 	return ret;
@@ -385,15 +388,16 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 		const char *eiface = SRCDIR ? get_physindev_name(skb) :
-					      get_phyoutdev_name(skb);
+					      get_physoutdev_name(skb);
+
 		if (!eiface)
 			return -EINVAL;
-
 		STRLCPY(e.iface, eiface);
 		e.physdev = 1;
 #endif
-	} else
+	} else {
 		STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+	}
 
 	if (strlen(e.iface) == 0)
 		return -EINVAL;
@@ -403,7 +407,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
-		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
@@ -440,6 +444,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_PHYSDEV)
 			e.physdev = 1;
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index b5428be1f159..3c862c0a76d1 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -57,8 +57,8 @@ struct hash_netnet4_elem {
 
 static inline bool
 hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
-		     const struct hash_netnet4_elem *ip2,
-		     u32 *multi)
+			const struct hash_netnet4_elem *ip2,
+			u32 *multi)
 {
 	return ip1->ipcmp == ip2->ipcmp &&
 	       ip1->ccmp == ip2->ccmp;
@@ -84,7 +84,7 @@ hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
 
 static inline void
 hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
-			  struct hash_netnet4_elem *orig)
+			     struct hash_netnet4_elem *orig)
 {
 	elem->ip[1] = orig->ip[1];
 }
@@ -103,7 +103,7 @@ hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
 
 static bool
 hash_netnet4_data_list(struct sk_buff *skb,
-		    const struct hash_netnet4_elem *data)
+		       const struct hash_netnet4_elem *data)
 {
 	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
 
@@ -122,7 +122,7 @@ nla_put_failure:
 
 static inline void
 hash_netnet4_data_next(struct hash_netnet4_elem *next,
-		    const struct hash_netnet4_elem *d)
+		       const struct hash_netnet4_elem *d)
 {
 	next->ipcmp = d->ipcmp;
 }
@@ -133,8 +133,8 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next,
 
 static int
 hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
-	       const struct xt_action_param *par,
-	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	const struct hash_netnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -156,7 +156,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
-	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	const struct hash_netnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -199,6 +199,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -221,8 +222,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (unlikely(ip + UINT_MAX == ip_to))
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+	}
 
 	ip2_to = ip2_from;
 	if (tb[IPSET_ATTR_IP2_TO]) {
@@ -233,8 +235,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip2_from, ip2_to);
 		if (unlikely(ip2_from + UINT_MAX == ip2_to))
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip[0]);
@@ -251,8 +254,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			ret = adtfn(set, &e, &ext, &ext, flags);
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-			else
-				ret = 0;
+
+			ret = 0;
 			ip2 = last2 + 1;
 		}
 		ip = last + 1;
@@ -276,8 +279,8 @@ struct hash_netnet6_elem {
 
 static inline bool
 hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
-		     const struct hash_netnet6_elem *ip2,
-		     u32 *multi)
+			const struct hash_netnet6_elem *ip2,
+			u32 *multi)
 {
 	return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
 	       ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -304,7 +307,7 @@ hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
 
 static inline void
 hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
-			  struct hash_netnet6_elem *orig)
+			     struct hash_netnet6_elem *orig)
 {
 	elem->ip[1] = orig->ip[1];
 }
@@ -323,7 +326,7 @@ hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
 
 static bool
 hash_netnet6_data_list(struct sk_buff *skb,
-		    const struct hash_netnet6_elem *data)
+		       const struct hash_netnet6_elem *data)
 {
 	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
 
@@ -342,7 +345,7 @@ nla_put_failure:
 
 static inline void
 hash_netnet6_data_next(struct hash_netnet4_elem *next,
-		    const struct hash_netnet6_elem *d)
+		       const struct hash_netnet6_elem *d)
 {
 }
 
@@ -356,8 +359,8 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next,
 
 static int
 hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
-	       const struct xt_action_param *par,
-	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	const struct hash_netnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -367,7 +370,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
 	e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
 	if (adt == IPSET_TEST)
-		e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK;
+		e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
 
 	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
 	ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6);
@@ -379,7 +382,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
-	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
@@ -424,6 +427,7 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 27307d0a8a5d..731813e0f08c 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -198,8 +198,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
@@ -208,6 +209,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -233,8 +235,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (ip + UINT_MAX == ip_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip);
@@ -250,8 +253,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-			else
-				ret = 0;
+
+			ret = 0;
 		}
 		ip = last + 1;
 	}
@@ -413,14 +416,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -444,8 +449,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 1e0e47ae40a4..0c68734f5cc4 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -62,8 +62,8 @@ struct hash_netportnet4_elem {
 
 static inline bool
 hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
-			   const struct hash_netportnet4_elem *ip2,
-			   u32 *multi)
+			    const struct hash_netportnet4_elem *ip2,
+			    u32 *multi)
 {
 	return ip1->ipcmp == ip2->ipcmp &&
 	       ip1->ccmp == ip2->ccmp &&
@@ -91,7 +91,7 @@ hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
 
 static inline void
 hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
-				struct hash_netportnet4_elem *orig)
+				 struct hash_netportnet4_elem *orig)
 {
 	elem->ip[1] = orig->ip[1];
 }
@@ -111,7 +111,7 @@ hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
 
 static bool
 hash_netportnet4_data_list(struct sk_buff *skb,
-			  const struct hash_netportnet4_elem *data)
+			   const struct hash_netportnet4_elem *data)
 {
 	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
 
@@ -132,7 +132,7 @@ nla_put_failure:
 
 static inline void
 hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
-			  const struct hash_netportnet4_elem *d)
+			   const struct hash_netportnet4_elem *d)
 {
 	next->ipcmp = d->ipcmp;
 	next->port = d->port;
@@ -144,8 +144,8 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
 
 static int
 hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
-		     const struct xt_action_param *par,
-		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
+		      const struct xt_action_param *par,
+		      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -171,7 +171,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
-		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -223,14 +223,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -254,8 +256,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (unlikely(ip + UINT_MAX == ip_to))
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+	}
 
 	port_to = port = ntohs(e.port);
 	if (tb[IPSET_ATTR_PORT_TO]) {
@@ -273,8 +276,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip2_from, ip2_to);
 		if (unlikely(ip2_from + UINT_MAX == ip2_to))
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip[0]);
@@ -296,8 +300,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 				ret = adtfn(set, &e, &ext, &ext, flags);
 				if (ret && !ip_set_eexist(ret, flags))
 					return ret;
-				else
-					ret = 0;
+
+				ret = 0;
 				ip2 = ip2_last + 1;
 			}
 		}
@@ -324,8 +328,8 @@ struct hash_netportnet6_elem {
 
 static inline bool
 hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
-			   const struct hash_netportnet6_elem *ip2,
-			   u32 *multi)
+			    const struct hash_netportnet6_elem *ip2,
+			    u32 *multi)
 {
 	return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
 	       ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -354,7 +358,7 @@ hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
 
 static inline void
 hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
-				struct hash_netportnet6_elem *orig)
+				 struct hash_netportnet6_elem *orig)
 {
 	elem->ip[1] = orig->ip[1];
 }
@@ -374,7 +378,7 @@ hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
 
 static bool
 hash_netportnet6_data_list(struct sk_buff *skb,
-			  const struct hash_netportnet6_elem *data)
+			   const struct hash_netportnet6_elem *data)
 {
 	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
 
@@ -395,7 +399,7 @@ nla_put_failure:
 
 static inline void
 hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
-			  const struct hash_netportnet6_elem *d)
+			   const struct hash_netportnet6_elem *d)
 {
 	next->port = d->port;
 }
@@ -410,8 +414,8 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
 
 static int
 hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
-		     const struct xt_action_param *par,
-		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
+		      const struct xt_action_param *par,
+		      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -437,7 +441,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
-		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -493,14 +497,16 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -524,8 +530,8 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 9f624ee9a41e..a1fe5377a2b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -206,14 +206,15 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			continue;
 		}
 
-		if (d->before == 0)
+		if (d->before == 0) {
 			ret = 1;
-		else if (d->before > 0) {
+		} else if (d->before > 0) {
 			next = list_next_entry(e, list);
 			ret = !list_is_last(&e->list, &map->members) &&
 			      next->id == d->refid;
-		} else
+		} else {
 			ret = prev && prev->id == d->refid;
+		}
 		return ret;
 	}
 	return 0;
@@ -558,7 +559,7 @@ static const struct ip_set_type_variant set_variant = {
 static void
 list_set_gc(unsigned long ul_set)
 {
-	struct ip_set *set = (struct ip_set *) ul_set;
+	struct ip_set *set = (struct ip_set *)ul_set;
 	struct list_set *map = set->data;
 
 	spin_lock_bh(&set->lock);
@@ -575,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	struct list_set *map = set->data;
 
 	init_timer(&map->gc);
-	map->gc.data = (unsigned long) set;
+	map->gc.data = (unsigned long)set;
 	map->gc.function = gc;
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 04d15fdc99ee..1c8a42c1056c 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -1,9 +1,7 @@
 #include <linux/export.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 
-/*
- * Prefixlen maps for fast conversions, by Jan Engelhardt.
- */
+/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
 
 #define E(a, b, c, d) \
 	{.ip6 = { \
@@ -11,8 +9,7 @@
 		htonl(c), htonl(d), \
 	} }
 
-/*
- * This table works for both IPv4 and IPv6;
+/* This table works for both IPv4 and IPv6;
  * just use prefixlen_netmask_map[prefixlength].ip.
  */
 const union nf_inet_addr ip_set_netmask_map[] = {
@@ -149,13 +146,12 @@ const union nf_inet_addr ip_set_netmask_map[] = {
 EXPORT_SYMBOL_GPL(ip_set_netmask_map);
 
 #undef  E
-#define E(a, b, c, d)						\
-	{.ip6 = { (__force __be32) a, (__force __be32) b,	\
-		  (__force __be32) c, (__force __be32) d,	\
+#define E(a, b, c, d)					\
+	{.ip6 = { (__force __be32)a, (__force __be32)b,	\
+		  (__force __be32)c, (__force __be32)d,	\
 	} }
 
-/*
- * This table works for both IPv4 and IPv6;
+/* This table works for both IPv4 and IPv6;
  * just use prefixlen_hostmask_map[prefixlength].ip.
  */
 const union nf_inet_addr ip_set_hostmask_map[] = {
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index b103e9627716..5669e5b453f4 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -9,7 +9,8 @@
  */
 
 /* Kernel module which implements the set match and SET target
- * for netfilter/iptables. */
+ * for netfilter/iptables.
+ */
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -53,6 +54,7 @@ static bool
 set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v0 *info = par->matchinfo;
+
 	ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
 		info->match_set.u.compat.flags, 0, UINT_MAX);
 
@@ -69,10 +71,10 @@ compat_flags(struct xt_set_info_v0 *info)
 	info->u.compat.dim = IPSET_DIM_ZERO;
 	if (info->u.flags[0] & IPSET_MATCH_INV)
 		info->u.compat.flags |= IPSET_INV_MATCH;
-	for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) {
+	for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) {
 		info->u.compat.dim++;
 		if (info->u.flags[i] & IPSET_SRC)
-			info->u.compat.flags |= (1<<info->u.compat.dim);
+			info->u.compat.flags |= (1 << info->u.compat.dim);
 	}
 }
 
@@ -89,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 			info->match_set.index);
 		return -ENOENT;
 	}
-	if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+	if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
 		pr_warn("Protocol error: set match dimension is over the limit!\n");
 		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
@@ -115,6 +117,7 @@ static bool
 set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v1 *info = par->matchinfo;
+
 	ADT_OPT(opt, par->family, info->match_set.dim,
 		info->match_set.flags, 0, UINT_MAX);
 
@@ -179,9 +182,10 @@ static bool
 set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v3 *info = par->matchinfo;
+	int ret;
+
 	ADT_OPT(opt, par->family, info->match_set.dim,
 		info->match_set.flags, info->flags, UINT_MAX);
-	int ret;
 
 	if (info->packets.op != IPSET_COUNTER_NONE ||
 	    info->bytes.op != IPSET_COUNTER_NONE)
@@ -225,9 +229,10 @@ static bool
 set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v4 *info = par->matchinfo;
+	int ret;
+
 	ADT_OPT(opt, par->family, info->match_set.dim,
 		info->match_set.flags, info->flags, UINT_MAX);
-	int ret;
 
 	if (info->packets.op != IPSET_COUNTER_NONE ||
 	    info->bytes.op != IPSET_COUNTER_NONE)
@@ -253,6 +258,7 @@ static unsigned int
 set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v0 *info = par->targinfo;
+
 	ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim,
 		info->add_set.u.compat.flags, 0, UINT_MAX);
 	ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim,
@@ -291,8 +297,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 			return -ENOENT;
 		}
 	}
-	if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
-	    info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+	if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
+	    info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
 		pr_warn("Protocol error: SET target dimension is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
 			ip_set_nfnl_put(par->net, info->add_set.index);
@@ -325,6 +331,7 @@ static unsigned int
 set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v1 *info = par->targinfo;
+
 	ADT_OPT(add_opt, par->family, info->add_set.dim,
 		info->add_set.flags, 0, UINT_MAX);
 	ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -393,6 +400,7 @@ static unsigned int
 set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v2 *info = par->targinfo;
+
 	ADT_OPT(add_opt, par->family, info->add_set.dim,
 		info->add_set.flags, info->flags, info->timeout);
 	ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -400,8 +408,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 
 	/* Normalize to fit into jiffies */
 	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
-	    add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
-		add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+	    add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
+		add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
 	if (info->del_set.index != IPSET_INVALID_ID)
@@ -419,6 +427,8 @@ static unsigned int
 set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v3 *info = par->targinfo;
+	int ret;
+
 	ADT_OPT(add_opt, par->family, info->add_set.dim,
 		info->add_set.flags, info->flags, info->timeout);
 	ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -426,12 +436,10 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 	ADT_OPT(map_opt, par->family, info->map_set.dim,
 		info->map_set.flags, 0, UINT_MAX);
 
-	int ret;
-
 	/* Normalize to fit into jiffies */
 	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
-	    add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
-		add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+	    add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
+		add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
 	if (info->del_set.index != IPSET_INVALID_ID)
@@ -457,7 +465,6 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-
 static int
 set_target_v3_checkentry(const struct xt_tgchk_param *par)
 {
@@ -497,8 +504,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 		     !(par->hook_mask & (1 << NF_INET_FORWARD |
 					 1 << NF_INET_LOCAL_OUT |
 					 1 << NF_INET_POST_ROUTING))) {
-			pr_warn("mapping of prio or/and queue is allowed only"
-				"from OUTPUT/FORWARD/POSTROUTING chains\n");
+			pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
 			return -EINVAL;
 		}
 		index = ip_set_nfnl_get_byindex(par->net,
@@ -519,8 +525,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 	if (info->add_set.dim > IPSET_DIM_MAX ||
 	    info->del_set.dim > IPSET_DIM_MAX ||
 	    info->map_set.dim > IPSET_DIM_MAX) {
-		pr_warn("Protocol error: SET target dimension "
-			"is over the limit!\n");
+		pr_warn("Protocol error: SET target dimension is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
 			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
@@ -546,7 +551,6 @@ set_target_v3_destroy(const struct xt_tgdtor_param *par)
 		ip_set_nfnl_put(par->net, info->map_set.index);
 }
 
-
 static struct xt_match set_matches[] __read_mostly = {
 	{
 		.name		= "set",
-- 
cgit v1.2.3


From 2cbce139fc57bc2625f88add055d0b94f00c3352 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 12 Jun 2015 13:55:41 +0200
Subject: netfilter: nf_tables: attach net_device to basechain

The device is part of the hook configuration, so instead of a global
configuration per table, set it to each of the basechain that we create.

This patch reworks ebddf1a8d78a ("netfilter: nf_tables: allow to bind table to
net_device").

Note that this adds a dev_name field in the nft_base_chain structure which is
required the netdev notification subscription that follows up in a patch to
handle gone net_devices.

Suggested-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  4 +-
 include/uapi/linux/netfilter/nf_tables.h |  4 +-
 net/netfilter/nf_tables_api.c            | 79 +++++++++++++++++---------------
 3 files changed, 46 insertions(+), 41 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3d6f48ca40a7..09d6f8df60f0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -791,6 +791,7 @@ struct nft_stats {
  *	@policy: default policy
  *	@stats: per-cpu chain stats
  *	@chain: the chain
+ *	@dev_name: device name that this base chain is attached to (if any)
  */
 struct nft_base_chain {
 	struct nf_hook_ops		ops[NFT_HOOK_OPS_MAX];
@@ -799,6 +800,7 @@ struct nft_base_chain {
 	u8				policy;
 	struct nft_stats __percpu	*stats;
 	struct nft_chain		chain;
+	char 				dev_name[IFNAMSIZ];
 };
 
 static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
@@ -819,7 +821,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt,
  *	@use: number of chain references to this table
  *	@flags: table flag (see enum nft_table_flags)
  *	@name: name of the table
- *	@dev: this table is bound to this device (if any)
  */
 struct nft_table {
 	struct list_head		list;
@@ -829,7 +830,6 @@ struct nft_table {
 	u32				use;
 	u16				flags;
 	char				name[NFT_TABLE_MAXNAMELEN];
-	struct net_device		*dev;
 };
 
 enum nft_af_flags {
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 89a671e0f5e7..a99e6a997140 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -122,11 +122,13 @@ enum nft_list_attributes {
  *
  * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
  * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ * @NFTA_HOOK_DEV: netdevice name (NLA_STRING)
  */
 enum nft_hook_attributes {
 	NFTA_HOOK_UNSPEC,
 	NFTA_HOOK_HOOKNUM,
 	NFTA_HOOK_PRIORITY,
+	NFTA_HOOK_DEV,
 	__NFTA_HOOK_MAX
 };
 #define NFTA_HOOK_MAX		(__NFTA_HOOK_MAX - 1)
@@ -146,14 +148,12 @@ enum nft_table_flags {
  * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
  * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
  * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
- * @NFTA_TABLE_DEV: net device name (NLA_STRING)
  */
 enum nft_table_attributes {
 	NFTA_TABLE_UNSPEC,
 	NFTA_TABLE_NAME,
 	NFTA_TABLE_FLAGS,
 	NFTA_TABLE_USE,
-	NFTA_TABLE_DEV,
 	__NFTA_TABLE_MAX
 };
 #define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4528f122bcd2..900c81a2f89a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -399,8 +399,6 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
 	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
 				    .len = NFT_TABLE_MAXNAMELEN - 1 },
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
-	[NFTA_TABLE_DEV]	= { .type = NLA_STRING,
-				    .len = IFNAMSIZ - 1 },
 };
 
 static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -425,10 +423,6 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
 		goto nla_put_failure;
 
-	if (table->dev &&
-	    nla_put_string(skb, NFTA_TABLE_DEV, table->dev->name))
-		goto nla_put_failure;
-
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -614,11 +608,6 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 	if (flags == ctx->table->flags)
 		return 0;
 
-	if ((ctx->afi->flags & NFT_AF_NEEDS_DEV) &&
-	    ctx->nla[NFTA_TABLE_DEV] &&
-	    nla_strcmp(ctx->nla[NFTA_TABLE_DEV], ctx->table->dev->name))
-		return -EOPNOTSUPP;
-
 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
 				sizeof(struct nft_trans_table));
 	if (trans == NULL)
@@ -656,7 +645,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
-	struct net_device *dev = NULL;
 	u32 flags = 0;
 	struct nft_ctx ctx;
 	int err;
@@ -691,20 +679,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 			return -EINVAL;
 	}
 
-	if (afi->flags & NFT_AF_NEEDS_DEV) {
-		char ifname[IFNAMSIZ];
-
-		if (!nla[NFTA_TABLE_DEV])
-			return -EOPNOTSUPP;
-
-		nla_strlcpy(ifname, nla[NFTA_TABLE_DEV], IFNAMSIZ);
-		dev = dev_get_by_name(net, ifname);
-		if (!dev)
-			return -ENOENT;
-	} else if (nla[NFTA_TABLE_DEV]) {
-		return -EOPNOTSUPP;
-	}
-
 	err = -EAFNOSUPPORT;
 	if (!try_module_get(afi->owner))
 		goto err1;
@@ -718,7 +692,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
-	table->dev   = dev;
 
 	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
 	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
@@ -732,9 +705,6 @@ err3:
 err2:
 	module_put(afi->owner);
 err1:
-	if (dev != NULL)
-		dev_put(dev);
-
 	return err;
 }
 
@@ -838,9 +808,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
 	BUG_ON(ctx->table->use > 0);
 
-	if (ctx->table->dev)
-		dev_put(ctx->table->dev);
-
 	kfree(ctx->table);
 	module_put(ctx->afi->owner);
 }
@@ -916,6 +883,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
 static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
 	[NFTA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
 	[NFTA_HOOK_PRIORITY]	= { .type = NLA_U32 },
+	[NFTA_HOOK_DEV]		= { .type = NLA_STRING,
+				    .len = IFNAMSIZ - 1 },
 };
 
 static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
@@ -989,6 +958,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 			goto nla_put_failure;
 		if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
 			goto nla_put_failure;
+		if (basechain->dev_name[0] &&
+		    nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
+			goto nla_put_failure;
 		nla_nest_end(skb, nest);
 
 		if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
@@ -1200,9 +1172,13 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 	BUG_ON(chain->use > 0);
 
 	if (chain->flags & NFT_BASE_CHAIN) {
-		module_put(nft_base_chain(chain)->type->owner);
-		free_percpu(nft_base_chain(chain)->stats);
-		kfree(nft_base_chain(chain));
+		struct nft_base_chain *basechain = nft_base_chain(chain);
+
+		module_put(basechain->type->owner);
+		free_percpu(basechain->stats);
+		if (basechain->ops[0].dev != NULL)
+			dev_put(basechain->ops[0].dev);
+		kfree(basechain);
 	} else {
 		kfree(chain);
 	}
@@ -1221,6 +1197,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 	struct nlattr *ha[NFTA_HOOK_MAX + 1];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct net_device *dev = NULL;
 	u8 policy = NF_ACCEPT;
 	u64 handle = 0;
 	unsigned int i;
@@ -1360,17 +1337,43 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			return -ENOENT;
 		hookfn = type->hooks[hooknum];
 
+		if (afi->flags & NFT_AF_NEEDS_DEV) {
+			char ifname[IFNAMSIZ];
+
+			if (!ha[NFTA_HOOK_DEV]) {
+				module_put(type->owner);
+				return -EOPNOTSUPP;
+			}
+
+			nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+			dev = dev_get_by_name(net, ifname);
+			if (!dev) {
+				module_put(type->owner);
+				return -ENOENT;
+			}
+		} else if (ha[NFTA_HOOK_DEV]) {
+			module_put(type->owner);
+			return -EOPNOTSUPP;
+		}
+
 		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
 		if (basechain == NULL) {
 			module_put(type->owner);
+			if (dev != NULL)
+				dev_put(dev);
 			return -ENOMEM;
 		}
 
+		if (dev != NULL)
+			strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+
 		if (nla[NFTA_CHAIN_COUNTERS]) {
 			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
 			if (IS_ERR(stats)) {
 				module_put(type->owner);
 				kfree(basechain);
+				if (dev != NULL)
+					dev_put(dev);
 				return PTR_ERR(stats);
 			}
 			basechain->stats = stats;
@@ -1379,6 +1382,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			if (stats == NULL) {
 				module_put(type->owner);
 				kfree(basechain);
+				if (dev != NULL)
+					dev_put(dev);
 				return -ENOMEM;
 			}
 			rcu_assign_pointer(basechain->stats, stats);
@@ -1396,7 +1401,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			ops->priority	= priority;
 			ops->priv	= chain;
 			ops->hook	= afi->hooks[ops->hooknum];
-			ops->dev	= table->dev;
+			ops->dev	= dev;
 			if (hookfn)
 				ops->hook = hookfn;
 			if (afi->hook_ops_init)
-- 
cgit v1.2.3


From ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 12 Jun 2015 19:39:12 -0700
Subject: bpf: introduce current->pid, tgid, uid, gid, comm accessors

eBPF programs attached to kprobes need to filter based on
current->pid, uid and other fields, so introduce helper functions:

u64 bpf_get_current_pid_tgid(void)
Return: current->tgid << 32 | current->pid

u64 bpf_get_current_uid_gid(void)
Return: current_gid << 32 | current_uid

bpf_get_current_comm(char *buf, int size_of_buf)
stores current->comm into buf

They can be used from the programs attached to TC as well to classify packets
based on current task fields.

Update tracex2 example to print histogram of write syscalls for each process
instead of aggregated for all.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h        |  3 +++
 include/uapi/linux/bpf.h   | 19 +++++++++++++
 kernel/bpf/core.c          |  3 +++
 kernel/bpf/helpers.c       | 58 +++++++++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c   |  6 +++++
 net/core/filter.c          |  6 +++++
 samples/bpf/bpf_helpers.h  |  6 +++++
 samples/bpf/tracex2_kern.c | 24 +++++++++++++----
 samples/bpf/tracex2_user.c | 67 +++++++++++++++++++++++++++++++++++++++-------
 9 files changed, 178 insertions(+), 14 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2235aee8096a..1b9a3f5b27f6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -188,5 +188,8 @@ extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
 extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
+extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
+extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
+extern const struct bpf_func_proto bpf_get_current_comm_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 602f05b7a275..29ef6f99e43d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -230,6 +230,25 @@ enum bpf_func_id {
 	 * Return: 0 on success
 	 */
 	BPF_FUNC_clone_redirect,
+
+	/**
+	 * u64 bpf_get_current_pid_tgid(void)
+	 * Return: current->tgid << 32 | current->pid
+	 */
+	BPF_FUNC_get_current_pid_tgid,
+
+	/**
+	 * u64 bpf_get_current_uid_gid(void)
+	 * Return: current_gid << 32 | current_uid
+	 */
+	BPF_FUNC_get_current_uid_gid,
+
+	/**
+	 * bpf_get_current_comm(char *buf, int size_of_buf)
+	 * stores current->comm into buf
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_get_current_comm,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1e00aa3316dc..1fc45cc83076 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -730,6 +730,9 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
+const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
+const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
+const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 
 /* Always built-in helper functions. */
 const struct bpf_func_proto bpf_tail_call_proto = {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 7ad5d8842d5b..1447ec09421e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -14,6 +14,8 @@
 #include <linux/random.h>
 #include <linux/smp.h>
 #include <linux/ktime.h>
+#include <linux/sched.h>
+#include <linux/uidgid.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -124,3 +126,59 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
 };
+
+static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct task_struct *task = current;
+
+	if (!task)
+		return -EINVAL;
+
+	return (u64) task->tgid << 32 | task->pid;
+}
+
+const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
+	.func		= bpf_get_current_pid_tgid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
+static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct task_struct *task = current;
+	kuid_t uid;
+	kgid_t gid;
+
+	if (!task)
+		return -EINVAL;
+
+	current_uid_gid(&uid, &gid);
+	return (u64) from_kgid(&init_user_ns, gid) << 32 |
+		from_kuid(&init_user_ns, uid);
+}
+
+const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
+	.func		= bpf_get_current_uid_gid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
+static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+{
+	struct task_struct *task = current;
+	char *buf = (char *) (long) r1;
+
+	if (!task)
+		return -EINVAL;
+
+	memcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
+	return 0;
+}
+
+const struct bpf_func_proto bpf_get_current_comm_proto = {
+	.func		= bpf_get_current_comm,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 50c4015a8ad3..3a17638cdf46 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -162,6 +162,12 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_current_comm:
+		return &bpf_get_current_comm_proto;
 
 	case BPF_FUNC_trace_printk:
 		/*
diff --git a/net/core/filter.c b/net/core/filter.c
index d271c06bf01f..20aa51ccbf9d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1459,6 +1459,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_l4_csum_replace_proto;
 	case BPF_FUNC_clone_redirect:
 		return &bpf_clone_redirect_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_current_comm:
+		return &bpf_get_current_comm_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f531a0b3282d..bdf1c1607b80 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -25,6 +25,12 @@ static void (*bpf_tail_call)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_tail_call;
 static unsigned long long (*bpf_get_smp_processor_id)(void) =
 	(void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+	(void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+	(void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+	(void *) BPF_FUNC_get_current_comm;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index 19ec1cfc45db..dc50f4f2943f 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -62,11 +62,18 @@ static unsigned int log2l(unsigned long v)
 		return log2(v);
 }
 
+struct hist_key {
+	char comm[16];
+	u64 pid_tgid;
+	u64 uid_gid;
+	u32 index;
+};
+
 struct bpf_map_def SEC("maps") my_hist_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(u32),
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct hist_key),
 	.value_size = sizeof(long),
-	.max_entries = 64,
+	.max_entries = 1024,
 };
 
 SEC("kprobe/sys_write")
@@ -75,11 +82,18 @@ int bpf_prog3(struct pt_regs *ctx)
 	long write_size = ctx->dx; /* arg3 */
 	long init_val = 1;
 	long *value;
-	u32 index = log2l(write_size);
+	struct hist_key key = {};
+
+	key.index = log2l(write_size);
+	key.pid_tgid = bpf_get_current_pid_tgid();
+	key.uid_gid = bpf_get_current_uid_gid();
+	bpf_get_current_comm(&key.comm, sizeof(key.comm));
 
-	value = bpf_map_lookup_elem(&my_hist_map, &index);
+	value = bpf_map_lookup_elem(&my_hist_map, &key);
 	if (value)
 		__sync_fetch_and_add(value, 1);
+	else
+		bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
 	return 0;
 }
 char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 91b8d0896fbb..cd0241c1447a 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <signal.h>
 #include <linux/bpf.h>
+#include <string.h>
 #include "libbpf.h"
 #include "bpf_load.h"
 
@@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width)
 	str[i] = '\0';
 }
 
-static void print_hist(int fd)
+struct task {
+	char comm[16];
+	__u64 pid_tgid;
+	__u64 uid_gid;
+};
+
+struct hist_key {
+	struct task t;
+	__u32 index;
+};
+
+#define SIZE sizeof(struct task)
+
+static void print_hist_for_pid(int fd, void *task)
 {
-	int key;
+	struct hist_key key = {}, next_key;
+	char starstr[MAX_STARS];
 	long value;
 	long data[MAX_INDEX] = {};
-	char starstr[MAX_STARS];
-	int i;
 	int max_ind = -1;
 	long max_value = 0;
+	int i, ind;
 
-	for (key = 0; key < MAX_INDEX; key++) {
-		bpf_lookup_elem(fd, &key, &value);
-		data[key] = value;
-		if (value && key > max_ind)
-			max_ind = key;
+	while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+		if (memcmp(&next_key, task, SIZE)) {
+			key = next_key;
+			continue;
+		}
+		bpf_lookup_elem(fd, &next_key, &value);
+		ind = next_key.index;
+		data[ind] = value;
+		if (value && ind > max_ind)
+			max_ind = ind;
 		if (value > max_value)
 			max_value = value;
+		key = next_key;
 	}
 
 	printf("           syscall write() stats\n");
@@ -48,6 +68,35 @@ static void print_hist(int fd)
 		       MAX_STARS, starstr);
 	}
 }
+
+static void print_hist(int fd)
+{
+	struct hist_key key = {}, next_key;
+	static struct task tasks[1024];
+	int task_cnt = 0;
+	int i;
+
+	while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+		int found = 0;
+
+		for (i = 0; i < task_cnt; i++)
+			if (memcmp(&tasks[i], &next_key, SIZE) == 0)
+				found = 1;
+		if (!found)
+			memcpy(&tasks[task_cnt++], &next_key, SIZE);
+		key = next_key;
+	}
+
+	for (i = 0; i < task_cnt; i++) {
+		printf("\npid %d cmd %s uid %d\n",
+		       (__u32) tasks[i].pid_tgid,
+		       tasks[i].comm,
+		       (__u32) tasks[i].uid_gid);
+		print_hist_for_pid(fd, &tasks[i]);
+	}
+
+}
+
 static void int_exit(int sig)
 {
 	print_hist(map_fd[1]);
-- 
cgit v1.2.3


From 254cb6dbfd8894743fbf814ec856ccd0874af691 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Sun, 14 Jun 2015 16:36:34 +0300
Subject: bonding: export slave's actor_oper_port_state via sysfs and netlink

Export the actor_oper_port_state of each port via sysfs and netlink.
In 802.3ad mode it is valuable for the user to be able to check the
actor_oper state, it is already exported via bond's proc entry.

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c     | 10 +++++++++-
 drivers/net/bonding/bond_sysfs_slave.c | 16 ++++++++++++++++
 include/uapi/linux/if_link.h           |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index f7015eb4f8db..a0e600db4236 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev,
 		nla_total_size(MAX_ADDR_LEN) +	/* IFLA_BOND_SLAVE_PERM_HWADDR */
 		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_QUEUE_ID */
 		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */
+		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */
 		0;
 }
 
@@ -56,12 +57,19 @@ static int bond_fill_slave_info(struct sk_buff *skb,
 
 	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
 		const struct aggregator *agg;
+		const struct port *ad_port;
 
+		ad_port = &SLAVE_AD_INFO(slave)->port;
 		agg = SLAVE_AD_INFO(slave)->port.aggregator;
-		if (agg)
+		if (agg) {
 			if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
 					agg->aggregator_identifier))
 				goto nla_put_failure;
+			if (nla_put_u8(skb,
+				       IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+				       ad_port->actor_oper_port_state))
+				goto nla_put_failure;
+		}
 	}
 
 	return 0;
diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index 23618a831612..f6c197cee669 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -80,6 +80,21 @@ static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf)
 }
 static SLAVE_ATTR_RO(ad_aggregator_id);
 
+static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf)
+{
+	const struct port *ad_port;
+
+	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
+		ad_port = &SLAVE_AD_INFO(slave)->port;
+		if (ad_port->aggregator)
+			return sprintf(buf, "%u\n",
+				       ad_port->actor_oper_port_state);
+	}
+
+	return sprintf(buf, "N/A\n");
+}
+static SLAVE_ATTR_RO(ad_actor_oper_port_state);
+
 static const struct slave_attribute *slave_attrs[] = {
 	&slave_attr_state,
 	&slave_attr_mii_status,
@@ -87,6 +102,7 @@ static const struct slave_attribute *slave_attrs[] = {
 	&slave_attr_perm_hwaddr,
 	&slave_attr_queue_id,
 	&slave_attr_ad_aggregator_id,
+	&slave_attr_ad_actor_oper_port_state,
 	NULL
 };
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 1737b7a8272b..1b3e357223f2 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -456,6 +456,7 @@ enum {
 	IFLA_BOND_SLAVE_PERM_HWADDR,
 	IFLA_BOND_SLAVE_QUEUE_ID,
 	IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+	IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
 	__IFLA_BOND_SLAVE_MAX,
 };
 
-- 
cgit v1.2.3


From 46ea297ed67cdeeb0142244873458b911037d0ba Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Sun, 14 Jun 2015 16:36:35 +0300
Subject: bonding: export slave's partner_oper_port_state via sysfs and netlink

Export the partner_oper_port_state of each port via sysfs and netlink.
In 802.3ad mode it is valuable for the user to be able to check the
partner_oper state, it is already exported via bond's proc entry.

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c     |  5 +++++
 drivers/net/bonding/bond_sysfs_slave.c | 16 ++++++++++++++++
 include/uapi/linux/if_link.h           |  1 +
 3 files changed, 22 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index a0e600db4236..5580fcde738f 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -29,6 +29,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev,
 		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_QUEUE_ID */
 		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */
 		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */
+		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */
 		0;
 }
 
@@ -69,6 +70,10 @@ static int bond_fill_slave_info(struct sk_buff *skb,
 				       IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
 				       ad_port->actor_oper_port_state))
 				goto nla_put_failure;
+			if (nla_put_u16(skb,
+					IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+					ad_port->partner_oper.port_state))
+				goto nla_put_failure;
 		}
 	}
 
diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index f6c197cee669..7d16c51e6913 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -95,6 +95,21 @@ static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf)
 }
 static SLAVE_ATTR_RO(ad_actor_oper_port_state);
 
+static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf)
+{
+	const struct port *ad_port;
+
+	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
+		ad_port = &SLAVE_AD_INFO(slave)->port;
+		if (ad_port->aggregator)
+			return sprintf(buf, "%u\n",
+				       ad_port->partner_oper.port_state);
+	}
+
+	return sprintf(buf, "N/A\n");
+}
+static SLAVE_ATTR_RO(ad_partner_oper_port_state);
+
 static const struct slave_attribute *slave_attrs[] = {
 	&slave_attr_state,
 	&slave_attr_mii_status,
@@ -103,6 +118,7 @@ static const struct slave_attribute *slave_attrs[] = {
 	&slave_attr_queue_id,
 	&slave_attr_ad_aggregator_id,
 	&slave_attr_ad_actor_oper_port_state,
+	&slave_attr_ad_partner_oper_port_state,
 	NULL
 };
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 1b3e357223f2..510efb360580 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -457,6 +457,7 @@ enum {
 	IFLA_BOND_SLAVE_QUEUE_ID,
 	IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
 	IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+	IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
 	__IFLA_BOND_SLAVE_MAX,
 };
 
-- 
cgit v1.2.3


From 3b766cd832328fcb87db3507e7b98cf42f21689d Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 15 Jun 2015 17:59:07 +0300
Subject: net/core: Add reading VF statistics through the PF netdevice

Add ndo_get_vf_stats where the PF retrieves and fills the VFs traffic
statistics. We encode the VF stats in a nested manner to allow for
future extensions.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h      |  9 ++++++++
 include/linux/netdevice.h    |  4 ++++
 include/uapi/linux/if_link.h | 13 +++++++++++
 net/core/rtnetlink.c         | 51 ++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index da4929927f69..ae5d0d22955d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -5,6 +5,15 @@
 
 
 /* We don't want this structure exposed to user space */
+struct ifla_vf_stats {
+	__u64 rx_packets;
+	__u64 tx_packets;
+	__u64 rx_bytes;
+	__u64 tx_bytes;
+	__u64 broadcast;
+	__u64 multicast;
+};
+
 struct ifla_vf_info {
 	__u32 vf;
 	__u8 mac[32];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6f5f71ff5169..e20979dfd6a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1100,6 +1100,10 @@ struct net_device_ops {
 						     struct ifla_vf_info *ivf);
 	int			(*ndo_set_vf_link_state)(struct net_device *dev,
 							 int vf, int link_state);
+	int			(*ndo_get_vf_stats)(struct net_device *dev,
+						    int vf,
+						    struct ifla_vf_stats
+						    *vf_stats);
 	int			(*ndo_set_vf_port)(struct net_device *dev,
 						   int vf,
 						   struct nlattr *port[]);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 510efb360580..2c7e8e3d3981 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -484,6 +484,7 @@ enum {
 	IFLA_VF_RSS_QUERY_EN,	/* RSS Redirection Table and Hash Key query
 				 * on/off switch
 				 */
+	IFLA_VF_STATS,		/* network device statistics */
 	__IFLA_VF_MAX,
 };
 
@@ -533,6 +534,18 @@ struct ifla_vf_rss_query_en {
 	__u32 setting;
 };
 
+enum {
+	IFLA_VF_STATS_RX_PACKETS,
+	IFLA_VF_STATS_TX_PACKETS,
+	IFLA_VF_STATS_RX_BYTES,
+	IFLA_VF_STATS_TX_BYTES,
+	IFLA_VF_STATS_BROADCAST,
+	IFLA_VF_STATS_MULTICAST,
+	__IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
 /* VF ports management section
  *
  *	Nested layout of set/get msg is:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 077b6d280371..2d102ce1474f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -819,7 +819,19 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
 			 nla_total_size(sizeof(struct ifla_vf_rate)) +
 			 nla_total_size(sizeof(struct ifla_vf_link_state)) +
-			 nla_total_size(sizeof(struct ifla_vf_rss_query_en)));
+			 nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
+			 /* IFLA_VF_STATS_RX_PACKETS */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_TX_PACKETS */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_RX_BYTES */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_TX_BYTES */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_BROADCAST */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_MULTICAST */
+			 nla_total_size(sizeof(__u64)));
 		return size;
 	} else
 		return 0;
@@ -1123,7 +1135,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
 		int i;
 
-		struct nlattr *vfinfo, *vf;
+		struct nlattr *vfinfo, *vf, *vfstats;
 		int num_vfs = dev_num_vf(dev->dev.parent);
 
 		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
@@ -1138,6 +1150,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_spoofchk vf_spoofchk;
 			struct ifla_vf_link_state vf_linkstate;
 			struct ifla_vf_rss_query_en vf_rss_query_en;
+			struct ifla_vf_stats vf_stats;
 
 			/*
 			 * Not all SR-IOV capable drivers support the
@@ -1190,6 +1203,30 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				    sizeof(vf_rss_query_en),
 				    &vf_rss_query_en))
 				goto nla_put_failure;
+			memset(&vf_stats, 0, sizeof(vf_stats));
+			if (dev->netdev_ops->ndo_get_vf_stats)
+				dev->netdev_ops->ndo_get_vf_stats(dev, i,
+								  &vf_stats);
+			vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+			if (!vfstats) {
+				nla_nest_cancel(skb, vf);
+				nla_nest_cancel(skb, vfinfo);
+				goto nla_put_failure;
+			}
+			if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
+					vf_stats.rx_packets) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
+					vf_stats.tx_packets) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
+					vf_stats.rx_bytes) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
+					vf_stats.tx_bytes) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
+					vf_stats.broadcast) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
+					vf_stats.multicast))
+				goto nla_put_failure;
+			nla_nest_end(skb, vfstats);
 			nla_nest_end(skb, vf);
 		}
 		nla_nest_end(skb, vfinfo);
@@ -1303,6 +1340,16 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_RATE]		= { .len = sizeof(struct ifla_vf_rate) },
 	[IFLA_VF_LINK_STATE]	= { .len = sizeof(struct ifla_vf_link_state) },
 	[IFLA_VF_RSS_QUERY_EN]	= { .len = sizeof(struct ifla_vf_rss_query_en) },
+	[IFLA_VF_STATS]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
+	[IFLA_VF_STATS_RX_PACKETS]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_TX_PACKETS]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_RX_BYTES]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_TX_BYTES]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_BROADCAST]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_MULTICAST]	= { .type = NLA_U64 },
 };
 
 static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
-- 
cgit v1.2.3


From eb4cb008529ca08e0d8c0fa54e8f739520197a65 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Mon, 15 Jun 2015 11:26:18 -0400
Subject: sock_diag: define destruction multicast groups

These groups will contain socket-destruction events for
AF_INET/AF_INET6, IPPROTO_TCP/IPPROTO_UDP.

Near the end of socket destruction, a check for listeners is
performed.  In the presence of a listener, rather than completely
cleanup the socket, a unit of work will be added to a private
work queue which will first broadcast information about the socket
and then finish the cleanup operation.

Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h      | 42 +++++++++++++++++++++
 include/net/sock.h             |  1 +
 include/uapi/linux/sock_diag.h | 10 +++++
 net/core/sock.c                | 11 +++++-
 net/core/sock_diag.c           | 85 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 148 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 083ac388098e..fddebc617469 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -1,7 +1,10 @@
 #ifndef __SOCK_DIAG_H__
 #define __SOCK_DIAG_H__
 
+#include <linux/netlink.h>
 #include <linux/user_namespace.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
 #include <uapi/linux/sock_diag.h>
 
 struct sk_buff;
@@ -11,6 +14,7 @@ struct sock;
 struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
+	int (*get_info)(struct sk_buff *skb, struct sock *sk);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
@@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
 int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 			     struct sk_buff *skb, int attrtype);
 
+static inline
+enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
+{
+	switch (sk->sk_family) {
+	case AF_INET:
+		switch (sk->sk_protocol) {
+		case IPPROTO_TCP:
+			return SKNLGRP_INET_TCP_DESTROY;
+		case IPPROTO_UDP:
+			return SKNLGRP_INET_UDP_DESTROY;
+		default:
+			return SKNLGRP_NONE;
+		}
+	case AF_INET6:
+		switch (sk->sk_protocol) {
+		case IPPROTO_TCP:
+			return SKNLGRP_INET6_TCP_DESTROY;
+		case IPPROTO_UDP:
+			return SKNLGRP_INET6_UDP_DESTROY;
+		default:
+			return SKNLGRP_NONE;
+		}
+	default:
+		return SKNLGRP_NONE;
+	}
+}
+
+static inline
+bool sock_diag_has_destroy_listeners(const struct sock *sk)
+{
+	const struct net *n = sock_net(sk);
+	const enum sknetlink_groups group = sock_diag_destroy_group(sk);
+
+	return group != SKNLGRP_NONE && n->diag_nlsk &&
+		netlink_has_listeners(n->diag_nlsk, group);
+}
+void sock_diag_broadcast_destroy(struct sock *sk);
+
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 26c1c3171e00..3e8258699270 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		      struct proto *prot, int kern);
 void sk_free(struct sock *sk);
+void sk_destruct(struct sock *sk);
 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
 
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index b00e29efb161..49230d36f9ce 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -23,4 +23,14 @@ enum {
 	SK_MEMINFO_VARS,
 };
 
+enum sknetlink_groups {
+	SKNLGRP_NONE,
+	SKNLGRP_INET_TCP_DESTROY,
+	SKNLGRP_INET_UDP_DESTROY,
+	SKNLGRP_INET6_TCP_DESTROY,
+	SKNLGRP_INET6_UDP_DESTROY,
+	__SKNLGRP_MAX,
+};
+#define SKNLGRP_MAX	(__SKNLGRP_MAX - 1)
+
 #endif /* _UAPI__SOCK_DIAG_H__ */
diff --git a/net/core/sock.c b/net/core/sock.c
index 7063c329c1b6..1e1fe9a68d83 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -131,6 +131,7 @@
 #include <linux/ipsec.h>
 #include <net/cls_cgroup.h>
 #include <net/netprio_cgroup.h>
+#include <linux/sock_diag.h>
 
 #include <linux/filter.h>
 
@@ -1423,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 }
 EXPORT_SYMBOL(sk_alloc);
 
-static void __sk_free(struct sock *sk)
+void sk_destruct(struct sock *sk)
 {
 	struct sk_filter *filter;
 
@@ -1451,6 +1452,14 @@ static void __sk_free(struct sock *sk)
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
 
+static void __sk_free(struct sock *sk)
+{
+	if (unlikely(sock_diag_has_destroy_listeners(sk)))
+		sock_diag_broadcast_destroy(sk);
+	else
+		sk_destruct(sk);
+}
+
 void sk_free(struct sock *sk)
 {
 	/*
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 74dddf84adcd..d79866c5f8bc 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -5,6 +5,9 @@
 #include <net/net_namespace.h>
 #include <linux/module.h>
 #include <net/sock.h>
+#include <linux/kernel.h>
+#include <linux/tcp.h>
+#include <linux/workqueue.h>
 
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>
@@ -12,6 +15,7 @@
 static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
+static struct workqueue_struct *broadcast_wq;
 
 static u64 sock_gen_cookie(struct sock *sk)
 {
@@ -101,6 +105,62 @@ out:
 }
 EXPORT_SYMBOL(sock_diag_put_filterinfo);
 
+struct broadcast_sk {
+	struct sock *sk;
+	struct work_struct work;
+};
+
+static size_t sock_diag_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
+	       + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
+	       + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
+}
+
+static void sock_diag_broadcast_destroy_work(struct work_struct *work)
+{
+	struct broadcast_sk *bsk =
+		container_of(work, struct broadcast_sk, work);
+	struct sock *sk = bsk->sk;
+	const struct sock_diag_handler *hndl;
+	struct sk_buff *skb;
+	const enum sknetlink_groups group = sock_diag_destroy_group(sk);
+	int err = -1;
+
+	WARN_ON(group == SKNLGRP_NONE);
+
+	skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL);
+	if (!skb)
+		goto out;
+
+	mutex_lock(&sock_diag_table_mutex);
+	hndl = sock_diag_handlers[sk->sk_family];
+	if (hndl && hndl->get_info)
+		err = hndl->get_info(skb, sk);
+	mutex_unlock(&sock_diag_table_mutex);
+
+	if (!err)
+		nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
+				GFP_KERNEL);
+	else
+		kfree_skb(skb);
+out:
+	sk_destruct(sk);
+	kfree(bsk);
+}
+
+void sock_diag_broadcast_destroy(struct sock *sk)
+{
+	/* Note, this function is often called from an interrupt context. */
+	struct broadcast_sk *bsk =
+		kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC);
+	if (!bsk)
+		return sk_destruct(sk);
+	bsk->sk = sk;
+	INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work);
+	queue_work(broadcast_wq, &bsk->work);
+}
+
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
 {
 	mutex_lock(&sock_diag_table_mutex);
@@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb)
 	mutex_unlock(&sock_diag_mutex);
 }
 
+static int sock_diag_bind(struct net *net, int group)
+{
+	switch (group) {
+	case SKNLGRP_INET_TCP_DESTROY:
+	case SKNLGRP_INET_UDP_DESTROY:
+		if (!sock_diag_handlers[AF_INET])
+			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+				       NETLINK_SOCK_DIAG, AF_INET);
+		break;
+	case SKNLGRP_INET6_TCP_DESTROY:
+	case SKNLGRP_INET6_UDP_DESTROY:
+		if (!sock_diag_handlers[AF_INET6])
+			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+				       NETLINK_SOCK_DIAG, AF_INET);
+		break;
+	}
+	return 0;
+}
+
 static int __net_init diag_net_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
+		.groups	= SKNLGRP_MAX,
 		.input	= sock_diag_rcv,
+		.bind	= sock_diag_bind,
+		.flags	= NL_CFG_F_NONROOT_RECV,
 	};
 
 	net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
@@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = {
 
 static int __init sock_diag_init(void)
 {
+	broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0);
+	BUG_ON(!broadcast_wq);
 	return register_pernet_subsys(&diag_net_ops);
 }
 
 static void __exit sock_diag_exit(void)
 {
 	unregister_pernet_subsys(&diag_net_ops);
+	destroy_workqueue(broadcast_wq);
 }
 
 module_init(sock_diag_init);
-- 
cgit v1.2.3


From 35ac838a9b96470f999db04320f53a2033642bfb Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Mon, 15 Jun 2015 11:26:20 -0400
Subject: sock_diag: implement a get_info handler for inet

This get_info handler will simply dispatch to the appropriate
existing inet protocol handler.

This patch also includes a new netlink attribute
(INET_DIAG_PROTOCOL).  This attribute is currently only used
for multicast messages.  Without this attribute, there is no
way of knowing the IP protocol used by the socket information
being broadcast.  This attribute is not necessary in the 'dump'
variant of this protocol (though it could easily be added)
because dump requests are issued for specific family/protocol
pairs.

Tested: ss -E (note, the -E option has not yet been merged into
the upstream version of ss).

Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h |  3 ++-
 net/ipv4/inet_diag.c           | 46 ++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp.c                 |  4 +++-
 net/ipv4/tcp_diag.c            |  5 +++--
 4 files changed, 54 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index c7093c75bdd6..b629fc53b109 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -111,9 +111,10 @@ enum {
 	INET_DIAG_SKMEMINFO,
 	INET_DIAG_SHUTDOWN,
 	INET_DIAG_DCTCPINFO,
+	INET_DIAG_PROTOCOL,  /* response attribute only */
 };
 
-#define INET_DIAG_MAX INET_DIAG_DCTCPINFO
+#define INET_DIAG_MAX INET_DIAG_PROTOCOL
 
 /* INET_DIAG_MEM */
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b1f01174bf32..21985d8d41e7 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1078,14 +1078,60 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 	return inet_diag_get_exact(skb, h, nlmsg_data(h));
 }
 
+static
+int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
+{
+	const struct inet_diag_handler *handler;
+	struct nlmsghdr *nlh;
+	struct nlattr *attr;
+	struct inet_diag_msg *r;
+	void *info = NULL;
+	int err = 0;
+
+	nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
+	if (!nlh)
+		return -ENOMEM;
+
+	r = nlmsg_data(nlh);
+	memset(r, 0, sizeof(*r));
+	inet_diag_msg_common_fill(r, sk);
+	r->idiag_state = sk->sk_state;
+
+	if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
+		nlmsg_cancel(skb, nlh);
+		return err;
+	}
+
+	handler = inet_diag_lock_handler(sk->sk_protocol);
+	if (IS_ERR(handler)) {
+		inet_diag_unlock_handler(handler);
+		nlmsg_cancel(skb, nlh);
+		return PTR_ERR(handler);
+	}
+
+	attr = handler->idiag_info_size
+		? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size)
+		: NULL;
+	if (attr)
+		info = nla_data(attr);
+
+	handler->idiag_get_info(sk, r, info);
+	inet_diag_unlock_handler(handler);
+
+	nlmsg_end(skb, nlh);
+	return 0;
+}
+
 static const struct sock_diag_handler inet_diag_handler = {
 	.family = AF_INET,
 	.dump = inet_diag_handler_dump,
+	.get_info = inet_diag_handler_get_info,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
 	.family = AF_INET6,
 	.dump = inet_diag_handler_dump,
+	.get_info = inet_diag_handler_get_info,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 65f791f74845..697b86dd45b3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2624,13 +2624,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
 /* Return information about state of tcp endpoint in API format. */
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp;
 	unsigned int start;
 	u32 rate;
 
 	memset(info, 0, sizeof(*info));
+	if (sk->sk_type != SOCK_STREAM)
+		return;
 
 	info->tcpi_state = sk->sk_state;
 	info->tcpi_ca_state = icsk->icsk_ca_state;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 423e3881a40b..479f34946177 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -19,13 +19,14 @@
 static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 			      void *_info)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_info *info = _info;
 
 	if (sk->sk_state == TCP_LISTEN) {
 		r->idiag_rqueue = sk->sk_ack_backlog;
 		r->idiag_wqueue = sk->sk_max_ack_backlog;
-	} else {
+	} else if (sk->sk_type == SOCK_STREAM) {
+		const struct tcp_sock *tp = tcp_sk(sk);
+
 		r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
 		r->idiag_wqueue = tp->write_seq - tp->snd_una;
 	}
-- 
cgit v1.2.3


From ea2c6d9745c6698d9f820bc230aa1a80d9e908ac Mon Sep 17 00:00:00 2001
From: Tiejun Chen <tiejun.chen@intel.com>
Date: Mon, 4 May 2015 10:48:49 +0800
Subject: kvm: remove one useless check extension

We already check KVM_CAP_IRQFD in generic once enable CONFIG_HAVE_KVM_IRQFD,

kvm_vm_ioctl_check_extension_generic()
    |
    + switch (arg) {
    +   ...
    +   #ifdef CONFIG_HAVE_KVM_IRQFD
    +       case KVM_CAP_IRQFD:
    +   #endif
    +   ...
    +   return 1;
    +   ...
    + }
    |
    + kvm_vm_ioctl_check_extension()

So its not necessary to check this in arch again, and also fix one typo,
s/emlation/emulation.

Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c       | 1 -
 include/uapi/linux/kvm.h | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e41cb11f71b2..7e8233015ad8 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -171,7 +171,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	int r;
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
-	case KVM_CAP_IRQFD:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 75bd9f7fd846..cbfd1acdeda7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -895,7 +895,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emlation.  See Documentation/virtual/kvm/api.txt.
+ * emulation.  See Documentation/virtual/kvm/api.txt.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
-- 
cgit v1.2.3


From ef493bd930ae482c608c5999b40d79dda3f2a674 Mon Sep 17 00:00:00 2001
From: Roman Kubiak <r.kubiak@samsung.com>
Date: Fri, 12 Jun 2015 12:32:57 +0200
Subject: netfilter: nfnetlink_queue: add security context information

This patch adds an additional attribute when sending
packet information via netlink in netfilter_queue module.
It will send additional security context data, so that
userspace applications can verify this context against
their own security databases.

Signed-off-by: Roman Kubiak <r.kubiak@samsung.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_queue.h |  4 ++-
 net/netfilter/nfnetlink_queue_core.c           | 35 +++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
index 8dd819e2b5fe..b67a853638ff 100644
--- a/include/uapi/linux/netfilter/nfnetlink_queue.h
+++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
@@ -49,6 +49,7 @@ enum nfqnl_attr_type {
 	NFQA_EXP,			/* nf_conntrack_netlink.h */
 	NFQA_UID,			/* __u32 sk uid */
 	NFQA_GID,			/* __u32 sk gid */
+	NFQA_SECCTX,			/* security context string */
 
 	__NFQA_MAX
 };
@@ -102,7 +103,8 @@ enum nfqnl_attr_config {
 #define NFQA_CFG_F_CONNTRACK			(1 << 1)
 #define NFQA_CFG_F_GSO				(1 << 2)
 #define NFQA_CFG_F_UID_GID			(1 << 3)
-#define NFQA_CFG_F_MAX				(1 << 4)
+#define NFQA_CFG_F_SECCTX			(1 << 4)
+#define NFQA_CFG_F_MAX				(1 << 5)
 
 /* flags for NFQA_SKB_INFO */
 /* packet appears to have wrong checksums, but they are ok */
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 22a5ac76683e..6eccf0fcdc63 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -278,6 +278,23 @@ nla_put_failure:
 	return -1;
 }
 
+static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
+{
+	u32 seclen = 0;
+#if IS_ENABLED(CONFIG_NETWORK_SECMARK)
+	if (!skb || !sk_fullsock(skb->sk))
+		return 0;
+
+	read_lock_bh(&skb->sk->sk_callback_lock);
+
+	if (skb->secmark)
+		security_secid_to_secctx(skb->secmark, secdata, &seclen);
+
+	read_unlock_bh(&skb->sk->sk_callback_lock);
+#endif
+	return seclen;
+}
+
 static struct sk_buff *
 nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			   struct nf_queue_entry *entry,
@@ -297,6 +314,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	struct nf_conn *ct = NULL;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
 	bool csum_verify;
+	char *secdata = NULL;
+	u32 seclen = 0;
 
 	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -352,6 +371,12 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			+ nla_total_size(sizeof(u_int32_t)));	/* gid */
 	}
 
+	if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
+		seclen = nfqnl_get_sk_secctx(entskb, &secdata);
+		if (seclen)
+			size += nla_total_size(seclen);
+	}
+
 	skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
 				  GFP_ATOMIC);
 	if (!skb) {
@@ -479,6 +504,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	    nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
 		goto nla_put_failure;
 
+	if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
+		goto nla_put_failure;
+
 	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
 		goto nla_put_failure;
 
@@ -1142,7 +1170,12 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			ret = -EOPNOTSUPP;
 			goto err_out_unlock;
 		}
-
+#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
+		if (flags & mask & NFQA_CFG_F_SECCTX) {
+			ret = -EOPNOTSUPP;
+			goto err_out_unlock;
+		}
+#endif
 		spin_lock_bh(&queue->lock);
 		queue->flags &= ~mask;
 		queue->flags |= flags & mask;
-- 
cgit v1.2.3


From 01555e74bde51444c6898ef1800fb2bc697d479e Mon Sep 17 00:00:00 2001
From: Harout Hedeshian <harouth@codeaurora.org>
Date: Mon, 15 Jun 2015 18:40:43 -0600
Subject: netfilter: xt_socket: add XT_SOCKET_RESTORESKMARK flag

xt_socket is useful for matching sockets with IP_TRANSPARENT and
taking some action on the matching packets. However, it lacks the
ability to match only a small subset of transparent sockets.

Suppose there are 2 applications, each with its own set of transparent
sockets. The first application wants all matching packets dropped,
while the second application wants them forwarded somewhere else.

Add the ability to retore the skb->mark from the sk_mark. The mark
is only restored if a matching socket is found and the transparent /
nowildcard conditions are satisfied.

Now the 2 hypothetical applications can differentiate their sockets
based on a mark value set with SO_MARK.

iptables -t mangle -I PREROUTING -m socket --transparent \
                                           --restore-skmark -j action
iptables -t mangle -A action -m mark --mark 10 -j action2
iptables -t mangle -A action -m mark --mark 11 -j action3

Signed-off-by: Harout Hedeshian <harouth@codeaurora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_socket.h |  8 +++++
 net/netfilter/xt_socket.c                | 59 ++++++++++++++++++++++++++++----
 2 files changed, 61 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h
index 6315e2ac3474..87644f832494 100644
--- a/include/uapi/linux/netfilter/xt_socket.h
+++ b/include/uapi/linux/netfilter/xt_socket.h
@@ -6,6 +6,7 @@
 enum {
 	XT_SOCKET_TRANSPARENT = 1 << 0,
 	XT_SOCKET_NOWILDCARD = 1 << 1,
+	XT_SOCKET_RESTORESKMARK = 1 << 2,
 };
 
 struct xt_socket_mtinfo1 {
@@ -18,4 +19,11 @@ struct xt_socket_mtinfo2 {
 };
 #define XT_SOCKET_FLAGS_V2 (XT_SOCKET_TRANSPARENT | XT_SOCKET_NOWILDCARD)
 
+struct xt_socket_mtinfo3 {
+	__u8 flags;
+};
+#define XT_SOCKET_FLAGS_V3 (XT_SOCKET_TRANSPARENT \
+			   | XT_SOCKET_NOWILDCARD \
+			   | XT_SOCKET_RESTORESKMARK)
+
 #endif /* _XT_SOCKET_H */
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index e092cb046326..43e26c881100 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -205,6 +205,7 @@ static bool
 socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
+	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
 	if (!sk)
@@ -226,6 +227,10 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 		if (info->flags & XT_SOCKET_TRANSPARENT)
 			transparent = xt_socket_sk_is_transparent(sk);
 
+		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+		    transparent)
+			pskb->mark = sk->sk_mark;
+
 		if (sk != skb->sk)
 			sock_gen_put(sk);
 
@@ -247,7 +252,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
 }
 
 static bool
-socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
@@ -371,9 +376,10 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
 }
 
 static bool
-socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
 	if (!sk)
@@ -395,6 +401,10 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 		if (info->flags & XT_SOCKET_TRANSPARENT)
 			transparent = xt_socket_sk_is_transparent(sk);
 
+		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+		    transparent)
+			pskb->mark = sk->sk_mark;
+
 		if (sk != skb->sk)
 			sock_gen_put(sk);
 
@@ -428,6 +438,19 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
+static int socket_mt_v3_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_socket_mtinfo3 *info =
+				    (struct xt_socket_mtinfo3 *)par->matchinfo;
+
+	if (info->flags & ~XT_SOCKET_FLAGS_V3) {
+		pr_info("unknown flags 0x%x\n",
+			info->flags & ~XT_SOCKET_FLAGS_V3);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static struct xt_match socket_mt_reg[] __read_mostly = {
 	{
 		.name		= "socket",
@@ -442,7 +465,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 1,
 		.family		= NFPROTO_IPV4,
-		.match		= socket_mt4_v1_v2,
+		.match		= socket_mt4_v1_v2_v3,
 		.checkentry	= socket_mt_v1_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
@@ -454,7 +477,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 1,
 		.family		= NFPROTO_IPV6,
-		.match		= socket_mt6_v1_v2,
+		.match		= socket_mt6_v1_v2_v3,
 		.checkentry	= socket_mt_v1_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
@@ -466,7 +489,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 2,
 		.family		= NFPROTO_IPV4,
-		.match		= socket_mt4_v1_v2,
+		.match		= socket_mt4_v1_v2_v3,
 		.checkentry	= socket_mt_v2_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
@@ -478,13 +501,37 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 2,
 		.family		= NFPROTO_IPV6,
-		.match		= socket_mt6_v1_v2,
+		.match		= socket_mt6_v1_v2_v3,
 		.checkentry	= socket_mt_v2_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
 		.me		= THIS_MODULE,
 	},
+#endif
+	{
+		.name		= "socket",
+		.revision	= 3,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt4_v1_v2_v3,
+		.checkentry	= socket_mt_v3_check,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#ifdef XT_SOCKET_HAVE_IPV6
+	{
+		.name		= "socket",
+		.revision	= 3,
+		.family		= NFPROTO_IPV6,
+		.match		= socket_mt6_v1_v2_v3,
+		.checkentry	= socket_mt_v3_check,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
 #endif
 };
 
-- 
cgit v1.2.3


From f8d5556fa9dbf6b88e1a8fe88e47ad1b8ddb4742 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 15 Jun 2015 10:52:40 -0300
Subject: [media] videodev2.h: fix copy-and-paste error in
 V4L2_MAP_XFER_FUNC_DEFAULT

The colorspace argument was compared against a V4L2_XFER_FUNC define instead
of against a V4L2_COLORSPACE define, returning the wrong answer.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 3d5fc72d53a7..3228fbebcd63 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -270,7 +270,7 @@ enum v4l2_xfer_func {
  * This depends on the colorspace.
  */
 #define V4L2_MAP_XFER_FUNC_DEFAULT(colsp) \
-	((colsp) == V4L2_XFER_FUNC_ADOBERGB ? V4L2_XFER_FUNC_ADOBERGB : \
+	((colsp) == V4L2_COLORSPACE_ADOBERGB ? V4L2_XFER_FUNC_ADOBERGB : \
 	 ((colsp) == V4L2_COLORSPACE_SMPTE240M ? V4L2_XFER_FUNC_SMPTE240M : \
 	  ((colsp) == V4L2_COLORSPACE_RAW ? V4L2_XFER_FUNC_NONE : \
 	   ((colsp) == V4L2_COLORSPACE_SRGB || (colsp) == V4L2_COLORSPACE_JPEG ? \
-- 
cgit v1.2.3


From a263653ed798216c0069922d7b5237ca49436007 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Jun 2015 10:28:27 -0500
Subject: netfilter: don't pull include/linux/netfilter.h from netns headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This pulls the full hook netfilter definitions from all those that include
net_namespace.h.

Instead let's just include the bare minimum required in the new
linux/netfilter_defs.h file, and use it from the netfilter netns header files.

I also needed to include in.h and in6.h from linux/netfilter.h otherwise we hit
this compilation error:

In file included from include/linux/netfilter_defs.h:4:0,
                 from include/net/netns/netfilter.h:4,
                 from include/net/net_namespace.h:22,
                 from include/linux/netdevice.h:43,
                 from net/netfilter/nfnetlink_queue_core.c:23:
include/uapi/linux/netfilter.h:76:17: error: field ‘in’ has incomplete type struct in_addr in;

And also explicit include linux/netfilter.h in several spots.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/netfilter.h      | 6 ++----
 include/linux/netfilter_defs.h | 9 +++++++++
 include/net/netns/netfilter.h  | 2 +-
 include/net/netns/x_tables.h   | 2 +-
 include/uapi/linux/netfilter.h | 3 ++-
 net/ipv6/output_core.c         | 1 +
 6 files changed, 16 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/netfilter_defs.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index f5ff5d156da8..00050dfd9f23 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -10,7 +10,8 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/static_key.h>
-#include <uapi/linux/netfilter.h>
+#include <linux/netfilter_defs.h>
+
 #ifdef CONFIG_NETFILTER
 static inline int NF_DROP_GETERR(int verdict)
 {
@@ -38,9 +39,6 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
 
 int netfilter_init(void);
 
-/* Largest hook number + 1 */
-#define NF_MAX_HOOKS 8
-
 struct sk_buff;
 
 struct nf_hook_ops;
diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h
new file mode 100644
index 000000000000..d3a7f8597e82
--- /dev/null
+++ b/include/linux/netfilter_defs.h
@@ -0,0 +1,9 @@
+#ifndef __LINUX_NETFILTER_CORE_H_
+#define __LINUX_NETFILTER_CORE_H_
+
+#include <uapi/linux/netfilter.h>
+
+/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */
+#define NF_MAX_HOOKS 8
+
+#endif
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index cf25b5e35f3c..532e4ba64f49 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -1,7 +1,7 @@
 #ifndef __NETNS_NETFILTER_H
 #define __NETNS_NETFILTER_H
 
-#include <linux/netfilter.h>
+#include <linux/netfilter_defs.h>
 
 struct proc_dir_entry;
 struct nf_logger;
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 4d6597ad6067..c8a7681efa6a 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -2,7 +2,7 @@
 #define __NETNS_X_TABLES_H
 
 #include <linux/list.h>
-#include <linux/netfilter.h>
+#include <linux/netfilter_defs.h>
 
 struct ebt_table;
 
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index 177027cce6b3..d93f949d1d9a 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -4,7 +4,8 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/sysctl.h>
-
+#include <linux/in.h>
+#include <linux/in6.h>
 
 /* Responses from hook functions. */
 #define NF_DROP 0
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 21678acd4521..928a0fb0b744 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -8,6 +8,7 @@
 #include <net/ip6_fib.h>
 #include <net/addrconf.h>
 #include <net/secure_seq.h>
+#include <linux/netfilter.h>
 
 static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
 			       const struct in6_addr *dst,
-- 
cgit v1.2.3


From 930e6fcd2bcce9bcd9d4aa7e755678d33f3fe6f4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Wed, 17 Jun 2015 09:51:10 -0400
Subject: perf tools: Add time out to force stop proc map processing

System wide sampling like 'perf top' or 'perf record -a' read all
threads /proc/xxx/maps before sampling. If there are any threads which
generating a keeping growing huge maps, perf will do infinite loop
during synthesizing. Nothing will be sampled.

This patch fixes this issue by adding per-thread timeout to force stop
this kind of endless proc map processing.

PERF_RECORD_MISC_PROC_MAP_PARSE_TIME_OUT is introduced to indicate that
the mmap record are truncated by time out. User will get warning
notification when truncated mmap records are detected.

Reported-by: Ying Huang <ying.huang@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ying Huang <ying.huang@intel.com>
Link: http://lkml.kernel.org/r/1434549071-25611-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/uapi/linux/perf_event.h |  4 ++++
 tools/perf/util/event.c         | 18 ++++++++++++++++++
 tools/perf/util/event.h         |  1 +
 tools/perf/util/session.c       | 11 +++++++++++
 4 files changed, 34 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 613ed9ad588f..d97f84c080da 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -565,6 +565,10 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+/*
+ * Indicates that /proc/PID/maps parsing are truncated by time out.
+ */
+#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT	(1 << 12)
 /*
  * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
  * different events so can reuse the same bit position.
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 793b1503d437..416ba80c628f 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -213,6 +213,8 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	return 0;
 }
 
+#define PROC_MAP_PARSE_TIMEOUT	(500 * 1000000ULL)
+
 int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       union perf_event *event,
 				       pid_t pid, pid_t tgid,
@@ -222,6 +224,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 {
 	char filename[PATH_MAX];
 	FILE *fp;
+	unsigned long long t;
+	bool truncation = false;
 	int rc = 0;
 
 	if (machine__is_default_guest(machine))
@@ -240,6 +244,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	}
 
 	event->header.type = PERF_RECORD_MMAP2;
+	t = rdclock();
 
 	while (1) {
 		char bf[BUFSIZ];
@@ -253,6 +258,12 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		if (fgets(bf, sizeof(bf), fp) == NULL)
 			break;
 
+		if ((rdclock() - t) > PROC_MAP_PARSE_TIMEOUT) {
+			pr_warning("Reading %s time out.\n", filename);
+			truncation = true;
+			goto out;
+		}
+
 		/* ensure null termination since stack will be reused. */
 		strcpy(execname, "");
 
@@ -301,6 +312,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
 		}
 
+out:
+		if (truncation)
+			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
 		if (!strcmp(execname, ""))
 			strcpy(execname, anonstr);
 
@@ -319,6 +334,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			rc = -1;
 			break;
 		}
+
+		if (truncation)
+			break;
 	}
 
 	fclose(fp);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 5dc51ada05df..39868f529cab 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -265,6 +265,7 @@ struct events_stats {
 	u32 nr_unknown_id;
 	u32 nr_unprocessable_samples;
 	u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+	u32 nr_proc_map_timeout;
 };
 
 struct attr_event {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c371336d1eb2..2d882fd1f1b9 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1064,6 +1064,8 @@ static int machines__deliver_event(struct machines *machines,
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
 	case PERF_RECORD_MMAP2:
+		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+			++evlist->stats.nr_proc_map_timeout;
 		return tool->mmap2(tool, event, sample, machine);
 	case PERF_RECORD_COMM:
 		return tool->comm(tool, event, sample, machine);
@@ -1360,6 +1362,15 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
 
 	events_stats__auxtrace_error_warn(stats);
+
+	if (stats->nr_proc_map_timeout != 0) {
+		ui__warning("%d map information files for pre-existing threads were\n"
+			    "not processed, if there are samples for addresses they\n"
+			    "will not be resolved, you may find out which are these\n"
+			    "threads by running with -v and redirecting the output\n"
+			    "to a file.\n",
+			    stats->nr_proc_map_timeout);
+	}
 }
 
 static int perf_session__flush_thread_stack(struct thread *thread,
-- 
cgit v1.2.3


From b42be38b2778eda2237fc759e55e3b698b05b315 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Wed, 17 Jun 2015 17:14:33 +0200
Subject: netlink: add API to retrieve all group memberships

This patch adds getsockopt(SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS) to
retrieve all groups a socket is a member of. Currently, we have to use
getsockname() and look at the nl.nl_groups bitmask. However, this mask is
limited to 32 groups. Hence, similar to NETLINK_ADD_MEMBERSHIP and
NETLINK_DROP_MEMBERSHIP, this adds a separate sockopt to manager higher
groups IDs than 32.

This new NETLINK_LIST_MEMBERSHIPS option takes a pointer to __u32 and the
size of the array. The array is filled with the full membership-set of the
socket, and the required array size is returned in optlen. Hence,
user-space can retry with a properly sized array in case it was too small.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink.h | 17 +++++++++--------
 net/netlink/af_netlink.c     | 22 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 3e34b7d702f8..cf6a65cccbdf 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -101,14 +101,15 @@ struct nlmsgerr {
 	struct nlmsghdr msg;
 };
 
-#define NETLINK_ADD_MEMBERSHIP	1
-#define NETLINK_DROP_MEMBERSHIP	2
-#define NETLINK_PKTINFO		3
-#define NETLINK_BROADCAST_ERROR	4
-#define NETLINK_NO_ENOBUFS	5
-#define NETLINK_RX_RING		6
-#define NETLINK_TX_RING		7
-#define NETLINK_LISTEN_ALL_NSID	8
+#define NETLINK_ADD_MEMBERSHIP		1
+#define NETLINK_DROP_MEMBERSHIP		2
+#define NETLINK_PKTINFO			3
+#define NETLINK_BROADCAST_ERROR		4
+#define NETLINK_NO_ENOBUFS		5
+#define NETLINK_RX_RING			6
+#define NETLINK_TX_RING			7
+#define NETLINK_LISTEN_ALL_NSID		8
+#define NETLINK_LIST_MEMBERSHIPS	9
 
 struct nl_pktinfo {
 	__u32	group;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 69d67c300b80..dea925388a5b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2290,6 +2290,28 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
 			return -EFAULT;
 		err = 0;
 		break;
+	case NETLINK_LIST_MEMBERSHIPS: {
+		int pos, idx, shift;
+
+		err = 0;
+		netlink_table_grab();
+		for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
+			if (len - pos < sizeof(u32))
+				break;
+
+			idx = pos / sizeof(unsigned long);
+			shift = (pos % sizeof(unsigned long)) * 8;
+			if (put_user((u32)(nlk->groups[idx] >> shift),
+				     (u32 __user *)(optval + pos))) {
+				err = -EFAULT;
+				break;
+			}
+		}
+		if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
+			err = -EFAULT;
+		netlink_table_ungrab();
+		break;
+	}
 	default:
 		err = -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From d0497524658e37956737d7dbee73cc42120255dc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Jun 2015 19:11:44 +0800
Subject: crypto: user - Move cryptouser.h to uapi

The header file cryptouser.h only contains information that is
exported to user-space.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/cryptouser.h      | 110 ----------------------------------------
 include/uapi/linux/cryptouser.h | 110 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 110 deletions(-)
 delete mode 100644 include/linux/cryptouser.h
 create mode 100644 include/uapi/linux/cryptouser.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/cryptouser.h b/include/linux/cryptouser.h
deleted file mode 100644
index 36efbbbf2f83..000000000000
--- a/include/linux/cryptouser.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Crypto user configuration API.
- *
- * Copyright (C) 2011 secunet Security Networks AG
- * Copyright (C) 2011 Steffen Klassert <steffen.klassert@secunet.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-/* Netlink configuration messages.  */
-enum {
-	CRYPTO_MSG_BASE = 0x10,
-	CRYPTO_MSG_NEWALG = 0x10,
-	CRYPTO_MSG_DELALG,
-	CRYPTO_MSG_UPDATEALG,
-	CRYPTO_MSG_GETALG,
-	__CRYPTO_MSG_MAX
-};
-#define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
-#define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE)
-
-#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME
-
-/* Netlink message attributes.  */
-enum crypto_attr_type_t {
-	CRYPTOCFGA_UNSPEC,
-	CRYPTOCFGA_PRIORITY_VAL,	/* __u32 */
-	CRYPTOCFGA_REPORT_LARVAL,	/* struct crypto_report_larval */
-	CRYPTOCFGA_REPORT_HASH,		/* struct crypto_report_hash */
-	CRYPTOCFGA_REPORT_BLKCIPHER,	/* struct crypto_report_blkcipher */
-	CRYPTOCFGA_REPORT_AEAD,		/* struct crypto_report_aead */
-	CRYPTOCFGA_REPORT_COMPRESS,	/* struct crypto_report_comp */
-	CRYPTOCFGA_REPORT_RNG,		/* struct crypto_report_rng */
-	CRYPTOCFGA_REPORT_CIPHER,	/* struct crypto_report_cipher */
-	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
-	__CRYPTOCFGA_MAX
-
-#define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
-};
-
-struct crypto_user_alg {
-	char cru_name[CRYPTO_MAX_ALG_NAME];
-	char cru_driver_name[CRYPTO_MAX_ALG_NAME];
-	char cru_module_name[CRYPTO_MAX_ALG_NAME];
-	__u32 cru_type;
-	__u32 cru_mask;
-	__u32 cru_refcnt;
-	__u32 cru_flags;
-};
-
-struct crypto_report_larval {
-	char type[CRYPTO_MAX_NAME];
-};
-
-struct crypto_report_hash {
-	char type[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int digestsize;
-};
-
-struct crypto_report_cipher {
-	char type[CRYPTO_MAX_ALG_NAME];
-	unsigned int blocksize;
-	unsigned int min_keysize;
-	unsigned int max_keysize;
-};
-
-struct crypto_report_blkcipher {
-	char type[CRYPTO_MAX_NAME];
-	char geniv[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int min_keysize;
-	unsigned int max_keysize;
-	unsigned int ivsize;
-};
-
-struct crypto_report_aead {
-	char type[CRYPTO_MAX_NAME];
-	char geniv[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int maxauthsize;
-	unsigned int ivsize;
-};
-
-struct crypto_report_comp {
-	char type[CRYPTO_MAX_NAME];
-};
-
-struct crypto_report_rng {
-	char type[CRYPTO_MAX_NAME];
-	unsigned int seedsize;
-};
-
-struct crypto_report_akcipher {
-	char type[CRYPTO_MAX_NAME];
-};
-
-#define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
-			       sizeof(struct crypto_report_blkcipher))
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
new file mode 100644
index 000000000000..36efbbbf2f83
--- /dev/null
+++ b/include/uapi/linux/cryptouser.h
@@ -0,0 +1,110 @@
+/*
+ * Crypto user configuration API.
+ *
+ * Copyright (C) 2011 secunet Security Networks AG
+ * Copyright (C) 2011 Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* Netlink configuration messages.  */
+enum {
+	CRYPTO_MSG_BASE = 0x10,
+	CRYPTO_MSG_NEWALG = 0x10,
+	CRYPTO_MSG_DELALG,
+	CRYPTO_MSG_UPDATEALG,
+	CRYPTO_MSG_GETALG,
+	__CRYPTO_MSG_MAX
+};
+#define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
+#define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE)
+
+#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME
+
+/* Netlink message attributes.  */
+enum crypto_attr_type_t {
+	CRYPTOCFGA_UNSPEC,
+	CRYPTOCFGA_PRIORITY_VAL,	/* __u32 */
+	CRYPTOCFGA_REPORT_LARVAL,	/* struct crypto_report_larval */
+	CRYPTOCFGA_REPORT_HASH,		/* struct crypto_report_hash */
+	CRYPTOCFGA_REPORT_BLKCIPHER,	/* struct crypto_report_blkcipher */
+	CRYPTOCFGA_REPORT_AEAD,		/* struct crypto_report_aead */
+	CRYPTOCFGA_REPORT_COMPRESS,	/* struct crypto_report_comp */
+	CRYPTOCFGA_REPORT_RNG,		/* struct crypto_report_rng */
+	CRYPTOCFGA_REPORT_CIPHER,	/* struct crypto_report_cipher */
+	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
+	__CRYPTOCFGA_MAX
+
+#define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
+};
+
+struct crypto_user_alg {
+	char cru_name[CRYPTO_MAX_ALG_NAME];
+	char cru_driver_name[CRYPTO_MAX_ALG_NAME];
+	char cru_module_name[CRYPTO_MAX_ALG_NAME];
+	__u32 cru_type;
+	__u32 cru_mask;
+	__u32 cru_refcnt;
+	__u32 cru_flags;
+};
+
+struct crypto_report_larval {
+	char type[CRYPTO_MAX_NAME];
+};
+
+struct crypto_report_hash {
+	char type[CRYPTO_MAX_NAME];
+	unsigned int blocksize;
+	unsigned int digestsize;
+};
+
+struct crypto_report_cipher {
+	char type[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	unsigned int min_keysize;
+	unsigned int max_keysize;
+};
+
+struct crypto_report_blkcipher {
+	char type[CRYPTO_MAX_NAME];
+	char geniv[CRYPTO_MAX_NAME];
+	unsigned int blocksize;
+	unsigned int min_keysize;
+	unsigned int max_keysize;
+	unsigned int ivsize;
+};
+
+struct crypto_report_aead {
+	char type[CRYPTO_MAX_NAME];
+	char geniv[CRYPTO_MAX_NAME];
+	unsigned int blocksize;
+	unsigned int maxauthsize;
+	unsigned int ivsize;
+};
+
+struct crypto_report_comp {
+	char type[CRYPTO_MAX_NAME];
+};
+
+struct crypto_report_rng {
+	char type[CRYPTO_MAX_NAME];
+	unsigned int seedsize;
+};
+
+struct crypto_report_akcipher {
+	char type[CRYPTO_MAX_NAME];
+};
+
+#define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
+			       sizeof(struct crypto_report_blkcipher))
-- 
cgit v1.2.3


From 9aa867e46565d61491f884c793e4988678fbffa3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Jun 2015 19:11:45 +0800
Subject: crypto: user - Add CRYPTO_MSG_DELRNG

This patch adds a new crypto_user command that allows the admin to
delete the crypto system RNG.  Note that this can only be done if
the RNG is currently not in use.  The next time it is used a new
system RNG will be allocated.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_user.c            | 12 +++++++++++-
 include/uapi/linux/cryptouser.h |  1 +
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 11dbd5a81c72..08ea2867fc8a 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -27,6 +27,7 @@
 #include <net/net_namespace.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/rng.h>
 #include <crypto/akcipher.h>
 
 #include "internal.h"
@@ -472,13 +473,21 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return 0;
 }
 
+static int crypto_del_rng(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct nlattr **attrs)
+{
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+	return crypto_del_default_rng();
+}
+
 #define MSGSIZE(type) sizeof(struct type)
 
 static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
-	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
 };
 
 static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
@@ -498,6 +507,7 @@ static const struct crypto_link {
 	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = { .doit = crypto_report,
 						       .dump = crypto_dump_report,
 						       .done = crypto_dump_report_done},
+	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
 };
 
 static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 36efbbbf2f83..2e67bb64c1da 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -25,6 +25,7 @@ enum {
 	CRYPTO_MSG_DELALG,
 	CRYPTO_MSG_UPDATEALG,
 	CRYPTO_MSG_GETALG,
+	CRYPTO_MSG_DELRNG,
 	__CRYPTO_MSG_MAX
 };
 #define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
-- 
cgit v1.2.3


From 5023a5ca8e144846ec0646554336000abb11e04f Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Thu, 16 Apr 2015 19:05:18 +0200
Subject: HSI: cmt_speech: fix timestamp interface

The user interface for timestamps in the new cmt_speech
driver is broken in multiple ways:

- The layout is incompatible between 32-bit and 64-bit user
  space, because of the size differences in 'struct timespec'.
  This means that the driver can not work when used with 32-bit
  user space on a 64-bit kernel.

- As there are plans to change 32-bit user space to use
  a 64-bit time_t type in the future, it will also be
  incompatible with new 32-bit user space.

- It is using ktime_get_ts under it's deprecated alias
  (do_posix_clock_monotonic_gettime).

To keep support for the user space tools written for this driver (which
have lived many years out-of-tree), the interface has been hardened to
unsigned 32-bit values.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/hsi/clients/cmt_speech.c     |  9 +++++++--
 include/uapi/linux/hsi/cs-protocol.h | 16 +++++++++++-----
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
index 4983529a9c6c..d04643f9548b 100644
--- a/drivers/hsi/clients/cmt_speech.c
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -451,9 +451,14 @@ static void cs_hsi_read_on_control_complete(struct hsi_msg *msg)
 	dev_dbg(&hi->cl->device, "Read on control: %08X\n", cmd);
 	cs_release_cmd(msg);
 	if (hi->flags & CS_FEAT_TSTAMP_RX_CTRL) {
-		struct timespec *tstamp =
+		struct timespec tspec;
+		struct cs_timestamp *tstamp =
 			&hi->mmap_cfg->tstamp_rx_ctrl;
-		do_posix_clock_monotonic_gettime(tstamp);
+
+		ktime_get_ts(&tspec);
+
+		tstamp->tv_sec = (__u32) tspec.tv_sec;
+		tstamp->tv_nsec = (__u32) tspec.tv_nsec;
 	}
 	spin_unlock(&hi->lock);
 
diff --git a/include/uapi/linux/hsi/cs-protocol.h b/include/uapi/linux/hsi/cs-protocol.h
index 4957bba57cbe..f153d6ea7c62 100644
--- a/include/uapi/linux/hsi/cs-protocol.h
+++ b/include/uapi/linux/hsi/cs-protocol.h
@@ -75,6 +75,15 @@ struct cs_buffer_config {
 	__u32 reserved[4];
 };
 
+/*
+ * struct for monotonic timestamp taken when the
+ * last control command was received
+ */
+struct cs_timestamp {
+	__u32 tv_sec;  /* seconds */
+	__u32 tv_nsec; /* nanoseconds */
+};
+
 /*
  * Struct describing the layout and contents of the driver mmap area.
  * This information is meant as read-only information for the application.
@@ -91,11 +100,8 @@ struct cs_mmap_config_block {
 	__u32 rx_ptr;
 	__u32 rx_ptr_boundary;
 	__u32 reserved3[2];
-	/*
-	 * if enabled with CS_FEAT_TSTAMP_RX_CTRL, monotonic
-	 * timestamp taken when the last control command was received
-	 */
-	struct timespec tstamp_rx_ctrl;
+	/* enabled with CS_FEAT_TSTAMP_RX_CTRL */
+	struct cs_timestamp tstamp_rx_ctrl;
 };
 
 #define CS_IO_MAGIC		'C'
-- 
cgit v1.2.3


From 2600896d659b638f5d4981dac4cd9aa03c8213da Mon Sep 17 00:00:00 2001
From: Yoshinori Sato <ysato@users.sourceforge.jp>
Date: Wed, 28 Jan 2015 02:54:39 +0900
Subject: Add ELF machine

Signed-off-by: Yoshinori Sato <ysato@users.sourceforge.jp>
---
 include/uapi/linux/elf-em.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index ae99f7743cf4..b08829667ed7 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -25,6 +25,7 @@
 #define EM_ARM		40	/* ARM 32 bit */
 #define EM_SH		42	/* SuperH */
 #define EM_SPARCV9	43	/* SPARC v9 64-bit */
+#define EM_H8_300	46	/* Renesas H8/300 */
 #define EM_IA_64	50	/* HP/Intel IA-64 */
 #define EM_X86_64	62	/* AMD x86-64 */
 #define EM_S390		22	/* IBM S/390 */
-- 
cgit v1.2.3


From 42bcce87d763b4d22dc6d3a0c0b60c6b49820de8 Mon Sep 17 00:00:00 2001
From: Anish Bhatt <anish@chelsio.com>
Date: Mon, 22 Jun 2015 17:44:35 -0700
Subject: dcb : Fix incorrect documentation for struct dcb_app

While IEEE and CEE use the same structure to store apps, the selector
and priority fields for both are different. Only the priority field is
explained, add documentation explaining how the selector field differs
for both.

cgdcbxd code shows an example of how selector fields differ.

Signed-off-by: Anish Bhatt <anish@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/dcbnl.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index 6497d7933d5b..3ea470f35e40 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -207,8 +207,7 @@ struct cee_pfc {
 #define IEEE_8021QAZ_APP_SEL_ANY	4
 
 /* This structure contains the IEEE 802.1Qaz APP managed object. This
- * object is also used for the CEE std as well. There is no difference
- * between the objects.
+ * object is also used for the CEE std as well.
  *
  * @selector: protocol identifier type
  * @protocol: protocol of type indicated
@@ -216,13 +215,18 @@ struct cee_pfc {
  *            8-bit 802.1p user priority bitmap for CEE
  *
  * ----
- *  Selector field values
+ *  Selector field values for IEEE 802.1Qaz
  *	0	Reserved
  *	1	Ethertype
  *	2	Well known port number over TCP or SCTP
  *	3	Well known port number over UDP or DCCP
  *	4	Well known port number over TCP, SCTP, UDP, or DCCP
  *	5-7	Reserved
+ *
+ *  Selector field values for CEE
+ *	0	Ethertype
+ *	1	Well known port number over TCP or UDP
+ *	2-3	Reserved
  */
 struct dcb_app {
 	__u8	selector;
-- 
cgit v1.2.3


From 8a3d03166f19329b46c6f9e900f93a89f446077b Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Tue, 23 Jun 2015 13:45:36 -0400
Subject: net: track link-status of ipv4 nexthops

Add a fib flag called RTNH_F_LINKDOWN to any ipv4 nexthops that are
reachable via an interface where carrier is off.  No action is taken,
but additional flags are passed to userspace to indicate carrier status.

This also includes a cleanup to fib_disable_ip to more clearly indicate
what event made the function call to replace the more cryptic force
option previously used.

v2: Split out kernel functionality into 2 patches, this patch simply
sets and clears new nexthop flag RTNH_F_LINKDOWN.

v3: Cleanups suggested by Alex as well as a bug noticed in
fib_sync_down_dev and fib_sync_up when multipath was not enabled.

v5: Whitespace and variable declaration fixups suggested by Dave.

v6: Style fixups noticed by Dave; ran checkpatch to be sure I got them
all.

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h           |  4 +--
 include/uapi/linux/rtnetlink.h |  3 +++
 net/ipv4/fib_frontend.c        | 23 ++++++++++------
 net/ipv4/fib_semantics.c       | 60 +++++++++++++++++++++++++++++++++---------
 4 files changed, 67 insertions(+), 23 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 54271ed0ed45..f73d27c5575a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -305,9 +305,9 @@ void fib_flush_external(struct net *net);
 
 /* Exported by fib_semantics.c */
 int ip_fib_check_default(__be32 gw, struct net_device *dev);
-int fib_sync_down_dev(struct net_device *dev, int force);
+int fib_sync_down_dev(struct net_device *dev, unsigned long event);
 int fib_sync_down_addr(struct net *net, __be32 local);
-int fib_sync_up(struct net_device *dev);
+int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 void fib_select_multipath(struct fib_result *res);
 
 /* Exported by fib_trie.c */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 17fb02f488da..fdd8f07f1d34 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -338,6 +338,9 @@ struct rtnexthop {
 #define RTNH_F_PERVASIVE	2	/* Do recursive gateway lookup	*/
 #define RTNH_F_ONLINK		4	/* Gateway is forced on link	*/
 #define RTNH_F_OFFLOAD		8	/* offloaded route */
+#define RTNH_F_LINKDOWN		16	/* carrier-down on nexthop */
+
+#define RTNH_COMPARE_MASK	(RTNH_F_DEAD | RTNH_F_LINKDOWN)
 
 /* Macros to handle hexthops */
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e6e6eb..534eb1485045 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1063,9 +1063,9 @@ static void nl_fib_lookup_exit(struct net *net)
 	net->ipv4.fibnl = NULL;
 }
 
-static void fib_disable_ip(struct net_device *dev, int force)
+static void fib_disable_ip(struct net_device *dev, unsigned long event)
 {
-	if (fib_sync_down_dev(dev, force))
+	if (fib_sync_down_dev(dev, event))
 		fib_flush(dev_net(dev));
 	rt_cache_flush(dev_net(dev));
 	arp_ifdown(dev);
@@ -1081,7 +1081,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 	case NETDEV_UP:
 		fib_add_ifaddr(ifa);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		fib_sync_up(dev);
+		fib_sync_up(dev, RTNH_F_DEAD);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
 		rt_cache_flush(dev_net(dev));
@@ -1093,7 +1093,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 			/* Last address was deleted from this interface.
 			 * Disable IP.
 			 */
-			fib_disable_ip(dev, 1);
+			fib_disable_ip(dev, event);
 		} else {
 			rt_cache_flush(dev_net(dev));
 		}
@@ -1107,9 +1107,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct in_device *in_dev;
 	struct net *net = dev_net(dev);
+	unsigned int flags;
 
 	if (event == NETDEV_UNREGISTER) {
-		fib_disable_ip(dev, 2);
+		fib_disable_ip(dev, event);
 		rt_flush_dev(dev);
 		return NOTIFY_DONE;
 	}
@@ -1124,16 +1125,22 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 			fib_add_ifaddr(ifa);
 		} endfor_ifa(in_dev);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		fib_sync_up(dev);
+		fib_sync_up(dev, RTNH_F_DEAD);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
 		rt_cache_flush(net);
 		break;
 	case NETDEV_DOWN:
-		fib_disable_ip(dev, 0);
+		fib_disable_ip(dev, event);
 		break;
-	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGE:
+		flags = dev_get_flags(dev);
+		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+			fib_sync_up(dev, RTNH_F_LINKDOWN);
+		else
+			fib_sync_down_dev(dev, event);
+		/* fall through */
+	case NETDEV_CHANGEMTU:
 		rt_cache_flush(net);
 		break;
 	}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 28ec3c1823bf..b1b305b1e340 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -266,7 +266,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
-		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
+		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
 			return -1;
 		onh++;
 	} endfor_nexthops(fi);
@@ -318,7 +318,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 		    nfi->fib_type == fi->fib_type &&
 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
 			   sizeof(u32) * RTAX_MAX) == 0 &&
-		    ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
+		    !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
 			return fi;
 	}
@@ -604,6 +604,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 				return -ENODEV;
 			if (!(dev->flags & IFF_UP))
 				return -ENETDOWN;
+			if (!netif_carrier_ok(dev))
+				nh->nh_flags |= RTNH_F_LINKDOWN;
 			nh->nh_dev = dev;
 			dev_hold(dev);
 			nh->nh_scope = RT_SCOPE_LINK;
@@ -636,6 +638,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 		if (!dev)
 			goto out;
 		dev_hold(dev);
+		if (!netif_carrier_ok(dev))
+			nh->nh_flags |= RTNH_F_LINKDOWN;
 		err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
 	} else {
 		struct in_device *in_dev;
@@ -654,6 +658,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 		nh->nh_dev = in_dev->dev;
 		dev_hold(nh->nh_dev);
 		nh->nh_scope = RT_SCOPE_HOST;
+		if (!netif_carrier_ok(nh->nh_dev))
+			nh->nh_flags |= RTNH_F_LINKDOWN;
 		err = 0;
 	}
 out:
@@ -920,11 +926,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		if (!nh->nh_dev)
 			goto failure;
 	} else {
+		int linkdown = 0;
+
 		change_nexthops(fi) {
 			err = fib_check_nh(cfg, fi, nexthop_nh);
 			if (err != 0)
 				goto failure;
+			if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+				linkdown++;
 		} endfor_nexthops(fi)
+		if (linkdown == fi->fib_nhs)
+			fi->fib_flags |= RTNH_F_LINKDOWN;
 	}
 
 	if (fi->fib_prefsrc) {
@@ -1103,7 +1115,7 @@ int fib_sync_down_addr(struct net *net, __be32 local)
 	return ret;
 }
 
-int fib_sync_down_dev(struct net_device *dev, int force)
+int fib_sync_down_dev(struct net_device *dev, unsigned long event)
 {
 	int ret = 0;
 	int scope = RT_SCOPE_NOWHERE;
@@ -1112,7 +1124,8 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 	struct hlist_head *head = &fib_info_devhash[hash];
 	struct fib_nh *nh;
 
-	if (force)
+	if (event == NETDEV_UNREGISTER ||
+	    event == NETDEV_DOWN)
 		scope = -1;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
@@ -1129,7 +1142,15 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 				dead++;
 			else if (nexthop_nh->nh_dev == dev &&
 				 nexthop_nh->nh_scope != scope) {
-				nexthop_nh->nh_flags |= RTNH_F_DEAD;
+				switch (event) {
+				case NETDEV_DOWN:
+				case NETDEV_UNREGISTER:
+					nexthop_nh->nh_flags |= RTNH_F_DEAD;
+					/* fall through */
+				case NETDEV_CHANGE:
+					nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
+					break;
+				}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 				spin_lock_bh(&fib_multipath_lock);
 				fi->fib_power -= nexthop_nh->nh_power;
@@ -1139,14 +1160,23 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 				dead++;
 			}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-			if (force > 1 && nexthop_nh->nh_dev == dev) {
+			if (event == NETDEV_UNREGISTER &&
+			    nexthop_nh->nh_dev == dev) {
 				dead = fi->fib_nhs;
 				break;
 			}
 #endif
 		} endfor_nexthops(fi)
 		if (dead == fi->fib_nhs) {
-			fi->fib_flags |= RTNH_F_DEAD;
+			switch (event) {
+			case NETDEV_DOWN:
+			case NETDEV_UNREGISTER:
+				fi->fib_flags |= RTNH_F_DEAD;
+				/* fall through */
+			case NETDEV_CHANGE:
+				fi->fib_flags |= RTNH_F_LINKDOWN;
+				break;
+			}
 			ret++;
 		}
 	}
@@ -1210,13 +1240,11 @@ out:
 	return;
 }
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
 /*
  * Dead device goes up. We wake up dead nexthops.
  * It takes sense only on multipath routes.
  */
-int fib_sync_up(struct net_device *dev)
+int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 {
 	struct fib_info *prev_fi;
 	unsigned int hash;
@@ -1243,7 +1271,7 @@ int fib_sync_up(struct net_device *dev)
 		prev_fi = fi;
 		alive = 0;
 		change_nexthops(fi) {
-			if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
+			if (!(nexthop_nh->nh_flags & nh_flags)) {
 				alive++;
 				continue;
 			}
@@ -1254,14 +1282,18 @@ int fib_sync_up(struct net_device *dev)
 			    !__in_dev_get_rtnl(dev))
 				continue;
 			alive++;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
 			spin_lock_bh(&fib_multipath_lock);
 			nexthop_nh->nh_power = 0;
-			nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
+			nexthop_nh->nh_flags &= ~nh_flags;
 			spin_unlock_bh(&fib_multipath_lock);
+#else
+			nexthop_nh->nh_flags &= ~nh_flags;
+#endif
 		} endfor_nexthops(fi)
 
 		if (alive > 0) {
-			fi->fib_flags &= ~RTNH_F_DEAD;
+			fi->fib_flags &= ~nh_flags;
 			ret++;
 		}
 	}
@@ -1269,6 +1301,8 @@ int fib_sync_up(struct net_device *dev)
 	return ret;
 }
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
 /*
  * The algorithm is suboptimal, but it provides really
  * fair weighted route distribution.
-- 
cgit v1.2.3


From 0eeb075fad736fb92620af995c47c204bbb5e829 Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Tue, 23 Jun 2015 13:45:37 -0400
Subject: net: ipv4 sysctl option to ignore routes when nexthop link is down

This feature is only enabled with the new per-interface or ipv4 global
sysctls called 'ignore_routes_with_linkdown'.

net.ipv4.conf.all.ignore_routes_with_linkdown = 0
net.ipv4.conf.default.ignore_routes_with_linkdown = 0
net.ipv4.conf.lo.ignore_routes_with_linkdown = 0
...

When the above sysctls are set, will report to userspace that a route is
dead and will no longer resolve to this nexthop when performing a fib
lookup.  This will signal to userspace that the route will not be
selected.  The signalling of a RTNH_F_DEAD is only passed to userspace
if the sysctl is enabled and link is down.  This was done as without it
the netlink listeners would have no idea whether or not a nexthop would
be selected.   The kernel only sets RTNH_F_DEAD internally if the
interface has IFF_UP cleared.

With the new sysctl set, the following behavior can be observed
(interface p8p1 is link-down):

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1 dead linkdown
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1 dead linkdown
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2
90.0.0.1 via 70.0.0.2 dev p7p1  src 70.0.0.1
    cache
local 80.0.0.1 dev lo  src 80.0.0.1
    cache <local>
80.0.0.2 via 10.0.5.2 dev p9p1  src 10.0.5.15
    cache

While the route does remain in the table (so it can be modified if
needed rather than being wiped away as it would be if IFF_UP was
cleared), the proper next-hop is chosen automatically when the link is
down.  Now interface p8p1 is linked-up:

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2
192.168.56.0/24 dev p2p1  proto kernel  scope link  src 192.168.56.2
90.0.0.1 via 80.0.0.2 dev p8p1  src 80.0.0.1
    cache
local 80.0.0.1 dev lo  src 80.0.0.1
    cache <local>
80.0.0.2 dev p8p1  src 80.0.0.1
    cache

and the output changes to what one would expect.

If the sysctl is not set, the following output would be expected when
p8p1 is down:

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1 linkdown
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1 linkdown
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2

Since the dead flag does not appear, there should be no expectation that
the kernel would skip using this route due to link being down.

v2: Split kernel changes into 2 patches, this actually makes a
behavioral change if the sysctl is set.  Also took suggestion from Alex
to simplify code by only checking sysctl during fib lookup and
suggestion from Scott to add a per-interface sysctl.

v3: Code clean-ups to make it more readable and efficient as well as a
reverse path check fix.

v4: Drop binary sysctl

v5: Whitespace fixups from Dave

v6: Style changes from Dave and checkpatch suggestions

v7: One more checkpatch fixup

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h        |  3 +++
 include/net/fib_rules.h           |  3 ++-
 include/net/ip_fib.h              | 16 +++++++++-------
 include/uapi/linux/ip.h           |  1 +
 net/ipv4/devinet.c                |  2 ++
 net/ipv4/fib_frontend.c           |  6 +++---
 net/ipv4/fib_rules.c              |  5 +++--
 net/ipv4/fib_semantics.c          | 33 ++++++++++++++++++++++++++++-----
 net/ipv4/fib_trie.c               |  6 ++++++
 net/ipv4/netfilter/ipt_rpfilter.c |  2 +-
 net/ipv4/route.c                  | 10 +++++-----
 11 files changed, 63 insertions(+), 24 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 0a21fbefdfbe..a4328cea376a 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -120,6 +120,9 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 	 || (!IN_DEV_FORWARD(in_dev) && \
 	  IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS)))
 
+#define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \
+	IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN)
+
 #define IN_DEV_ARPFILTER(in_dev)	IN_DEV_ORCONF((in_dev), ARPFILTER)
 #define IN_DEV_ARP_ACCEPT(in_dev)	IN_DEV_ORCONF((in_dev), ARP_ACCEPT)
 #define IN_DEV_ARP_ANNOUNCE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE)
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 6d67383a5114..903a55efbffe 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -36,7 +36,8 @@ struct fib_lookup_arg {
 	void			*result;
 	struct fib_rule		*rule;
 	int			flags;
-#define FIB_LOOKUP_NOREF	1
+#define FIB_LOOKUP_NOREF		1
+#define FIB_LOOKUP_IGNORE_LINKSTATE	2
 };
 
 struct fib_rules_ops {
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f73d27c5575a..49c142bdf01e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -226,7 +226,7 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id)
 }
 
 static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
-			     struct fib_result *res)
+			     struct fib_result *res, unsigned int flags)
 {
 	struct fib_table *tb;
 	int err = -ENETUNREACH;
@@ -234,7 +234,7 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 	rcu_read_lock();
 
 	tb = fib_get_table(net, RT_TABLE_MAIN);
-	if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+	if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF))
 		err = 0;
 
 	rcu_read_unlock();
@@ -249,16 +249,18 @@ void __net_exit fib4_rules_exit(struct net *net);
 struct fib_table *fib_new_table(struct net *net, u32 id);
 struct fib_table *fib_get_table(struct net *net, u32 id);
 
-int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res);
+int __fib_lookup(struct net *net, struct flowi4 *flp,
+		 struct fib_result *res, unsigned int flags);
 
 static inline int fib_lookup(struct net *net, struct flowi4 *flp,
-			     struct fib_result *res)
+			     struct fib_result *res, unsigned int flags)
 {
 	struct fib_table *tb;
 	int err;
 
+	flags |= FIB_LOOKUP_NOREF;
 	if (net->ipv4.fib_has_custom_rules)
-		return __fib_lookup(net, flp, res);
+		return __fib_lookup(net, flp, res, flags);
 
 	rcu_read_lock();
 
@@ -266,11 +268,11 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
 
 	for (err = 0; !err; err = -ENETUNREACH) {
 		tb = rcu_dereference_rtnl(net->ipv4.fib_main);
-		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+		if (tb && !fib_table_lookup(tb, flp, res, flags))
 			break;
 
 		tb = rcu_dereference_rtnl(net->ipv4.fib_default);
-		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+		if (tb && !fib_table_lookup(tb, flp, res, flags))
 			break;
 	}
 
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index 411959405ab6..08f894d2ddbd 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -164,6 +164,7 @@ enum
 	IPV4_DEVCONF_ROUTE_LOCALNET,
 	IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL,
 	IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL,
+	IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
 	__IPV4_DEVCONF_MAX
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 419d23c53ec7..7498716e8f54 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2169,6 +2169,8 @@ static struct devinet_sysctl_table {
 					"igmpv2_unsolicited_report_interval"),
 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
 					"igmpv3_unsolicited_report_interval"),
+		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
+					"ignore_routes_with_linkdown"),
 
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 534eb1485045..6bbc54940eb4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -280,7 +280,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 		fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 		fl4.flowi4_scope = scope;
 		fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
-		if (!fib_lookup(net, &fl4, &res))
+		if (!fib_lookup(net, &fl4, &res, 0))
 			return FIB_RES_PREFSRC(net, res);
 	} else {
 		scope = RT_SCOPE_LINK;
@@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
 
 	net = dev_net(dev);
-	if (fib_lookup(net, &fl4, &res))
+	if (fib_lookup(net, &fl4, &res, 0))
 		goto last_resort;
 	if (res.type != RTN_UNICAST &&
 	    (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
@@ -354,7 +354,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	fl4.flowi4_oif = dev->ifindex;
 
 	ret = 0;
-	if (fib_lookup(net, &fl4, &res) == 0) {
+	if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
 		if (res.type == RTN_UNICAST)
 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 	}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 56151982f74e..18123d50f576 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -47,11 +47,12 @@ struct fib4_rule {
 #endif
 };
 
-int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
+int __fib_lookup(struct net *net, struct flowi4 *flp,
+		 struct fib_result *res, unsigned int flags)
 {
 	struct fib_lookup_arg arg = {
 		.result = res,
-		.flags = FIB_LOOKUP_NOREF,
+		.flags = flags,
 	};
 	int err;
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b1b305b1e340..3bfccd83551c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -623,7 +623,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			/* It is not necessary, but requires a bit of thinking */
 			if (fl4.flowi4_scope < RT_SCOPE_LINK)
 				fl4.flowi4_scope = RT_SCOPE_LINK;
-			err = fib_lookup(net, &fl4, &res);
+			err = fib_lookup(net, &fl4, &res,
+					 FIB_LOOKUP_IGNORE_LINKSTATE);
 			if (err) {
 				rcu_read_unlock();
 				return err;
@@ -1035,12 +1036,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	    nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
 		goto nla_put_failure;
 	if (fi->fib_nhs == 1) {
+		struct in_device *in_dev;
+
 		if (fi->fib_nh->nh_gw &&
 		    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
 			goto nla_put_failure;
 		if (fi->fib_nh->nh_oif &&
 		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
 			goto nla_put_failure;
+		if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
+			in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
+			if (in_dev &&
+			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+				rtm->rtm_flags |= RTNH_F_DEAD;
+		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (fi->fib_nh[0].nh_tclassid &&
 		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1057,11 +1066,19 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			goto nla_put_failure;
 
 		for_nexthops(fi) {
+			struct in_device *in_dev;
+
 			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
 			if (!rtnh)
 				goto nla_put_failure;
 
 			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+			if (nh->nh_flags & RTNH_F_LINKDOWN) {
+				in_dev = __in_dev_get_rcu(nh->nh_dev);
+				if (in_dev &&
+				    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+					rtnh->rtnh_flags |= RTNH_F_DEAD;
+			}
 			rtnh->rtnh_hops = nh->nh_weight - 1;
 			rtnh->rtnh_ifindex = nh->nh_oif;
 
@@ -1310,16 +1327,22 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 void fib_select_multipath(struct fib_result *res)
 {
 	struct fib_info *fi = res->fi;
+	struct in_device *in_dev;
 	int w;
 
 	spin_lock_bh(&fib_multipath_lock);
 	if (fi->fib_power <= 0) {
 		int power = 0;
 		change_nexthops(fi) {
-			if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
-				power += nexthop_nh->nh_weight;
-				nexthop_nh->nh_power = nexthop_nh->nh_weight;
-			}
+			in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
+			if (nexthop_nh->nh_flags & RTNH_F_DEAD)
+				continue;
+			if (in_dev &&
+			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+			    nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+				continue;
+			power += nexthop_nh->nh_weight;
+			nexthop_nh->nh_power = nexthop_nh->nh_weight;
 		} endfor_nexthops(fi);
 		fi->fib_power = power;
 		if (power <= 0) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6c666a9f1bd5..15d32612e3c6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1412,9 +1412,15 @@ found:
 			continue;
 		for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
 			const struct fib_nh *nh = &fi->fib_nh[nhsel];
+			struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
 
 			if (nh->nh_flags & RTNH_F_DEAD)
 				continue;
+			if (in_dev &&
+			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+			    nh->nh_flags & RTNH_F_LINKDOWN &&
+			    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+				continue;
 			if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
 				continue;
 
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 4bfaedf9b34e..8618fd150c96 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -40,7 +40,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
 	struct net *net = dev_net(dev);
 	int ret __maybe_unused;
 
-	if (fib_lookup(net, fl4, &res))
+	if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
 		return false;
 
 	if (res.type != RTN_UNICAST) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6055984c307..d0362a2de3d3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -747,7 +747,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 		if (!(n->nud_state & NUD_VALID)) {
 			neigh_event_send(n, NULL);
 		} else {
-			if (fib_lookup(net, fl4, &res) == 0) {
+			if (fib_lookup(net, fl4, &res, 0) == 0) {
 				struct fib_nh *nh = &FIB_RES_NH(res);
 
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
@@ -975,7 +975,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 		return;
 
 	rcu_read_lock();
-	if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
+	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
 		struct fib_nh *nh = &FIB_RES_NH(res);
 
 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
@@ -1186,7 +1186,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
 		fl4.flowi4_mark = skb->mark;
 
 		rcu_read_lock();
-		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
+		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
 			src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
 		else
 			src = inet_select_addr(rt->dst.dev,
@@ -1716,7 +1716,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
-	err = fib_lookup(net, &fl4, &res);
+	err = fib_lookup(net, &fl4, &res, 0);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
 			err = -EHOSTUNREACH;
@@ -2123,7 +2123,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 		goto make_route;
 	}
 
-	if (fib_lookup(net, fl4, &res)) {
+	if (fib_lookup(net, fl4, &res, 0)) {
 		res.fi = NULL;
 		res.table = NULL;
 		if (fl4->flowi4_oif) {
-- 
cgit v1.2.3


From 204621551b2a0060a013b92f7add4d5c452fa7cb Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 24 Jun 2015 11:02:51 +0200
Subject: net: inet_diag: export IPV6_V6ONLY sockopt

For AF_INET6 sockets, the value of struct ipv6_pinfo.ipv6only is
exported to userspace. It indicates whether a socket bound to in6addr_any
listens on IPv4 as well as IPv6. Since the socket is natively IPv6, it is not
listed by e.g. 'ss -l -4'.

This patch is accompanied by an appropriate one for iproute2 to enable
the additional information in 'ss -e'.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h | 3 ++-
 net/ipv4/inet_diag.c           | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index b629fc53b109..68a1f71fde9f 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -112,9 +112,10 @@ enum {
 	INET_DIAG_SHUTDOWN,
 	INET_DIAG_DCTCPINFO,
 	INET_DIAG_PROTOCOL,  /* response attribute only */
+	INET_DIAG_SKV6ONLY,
 };
 
-#define INET_DIAG_MAX INET_DIAG_PROTOCOL
+#define INET_DIAG_MAX INET_DIAG_SKV6ONLY
 
 /* INET_DIAG_MEM */
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 516a1f6fbdd3..9bc26677058e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -151,6 +151,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			if (nla_put_u8(skb, INET_DIAG_TCLASS,
 				       inet6_sk(sk)->tclass) < 0)
 				goto errout;
+
+		if (ipv6_only_sock(sk) &&
+		    nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1))
+			goto errout;
 	}
 #endif
 
-- 
cgit v1.2.3


From 62232e45f4a265abb43f0acf16e58f5d0b6e1ec9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 8 Jun 2015 14:27:06 -0400
Subject: libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices

Most discovery/configuration of the nvdimm-subsystem is done via sysfs
attributes.  However, some nvdimm_bus instances, particularly the
ACPI.NFIT bus, define a small set of messages that can be passed to the
platform.  For convenience we derive the initial libnvdimm-ioctl command
formats directly from the NFIT DSM Interface Example formats.

    ND_CMD_SMART: media health and diagnostics
    ND_CMD_GET_CONFIG_SIZE: size of the label space
    ND_CMD_GET_CONFIG_DATA: read label space
    ND_CMD_SET_CONFIG_DATA: write label space
    ND_CMD_VENDOR: vendor-specific command passthrough
    ND_CMD_ARS_CAP: report address-range-scrubbing capabilities
    ND_CMD_ARS_START: initiate scrubbing
    ND_CMD_ARS_STATUS: report on scrubbing state
    ND_CMD_SMART_THRESHOLD: configure alarm thresholds for smart events

If a platform later defines different commands than this set it is
straightforward to extend support to those formats.

Most of the commands target a specific dimm.  However, the
address-range-scrubbing commands target the bus.  The 'commands'
attribute in sysfs of an nvdimm_bus, or nvdimm, enumerate the supported
commands for that object.

Cc: <linux-acpi@vger.kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reported-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/Kconfig       |  12 ++
 drivers/acpi/nfit.c        | 216 +++++++++++++++++++++++++++++-
 drivers/acpi/nfit.h        |   3 +
 drivers/nvdimm/bus.c       | 326 ++++++++++++++++++++++++++++++++++++++++++++-
 drivers/nvdimm/core.c      |  16 +++
 drivers/nvdimm/dimm_devs.c |  38 +++++-
 drivers/nvdimm/nd-core.h   |   3 +
 include/linux/libnvdimm.h  |  27 +++-
 include/uapi/linux/Kbuild  |   1 +
 include/uapi/linux/ndctl.h | 178 +++++++++++++++++++++++++
 10 files changed, 810 insertions(+), 10 deletions(-)
 create mode 100644 include/uapi/linux/ndctl.h

(limited to 'include/uapi/linux')

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 300b4ef3712b..9c43ae301300 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -397,6 +397,18 @@ config ACPI_NFIT
 	  To compile this driver as a module, choose M here:
 	  the module will be called nfit.
 
+config ACPI_NFIT_DEBUG
+	bool "NFIT DSM debug"
+	depends on ACPI_NFIT
+	depends on DYNAMIC_DEBUG
+	default n
+	help
+	  Enabling this option causes the nfit driver to dump the
+	  input and output buffers of _DSM operations on the ACPI0012
+	  device and its children.  This can be very verbose, so leave
+	  it disabled unless you are debugging a hardware / firmware
+	  issue.
+
 source "drivers/acpi/apei/Kconfig"
 
 config ACPI_EXTLOG
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 9fd7781f966e..9112a6210a4b 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -13,6 +13,7 @@
 #include <linux/list_sort.h>
 #include <linux/libnvdimm.h>
 #include <linux/module.h>
+#include <linux/ndctl.h>
 #include <linux/list.h>
 #include <linux/acpi.h>
 #include "nfit.h"
@@ -24,11 +25,153 @@ static const u8 *to_nfit_uuid(enum nfit_uuids id)
 	return nfit_uuid[id];
 }
 
+static struct acpi_nfit_desc *to_acpi_nfit_desc(
+		struct nvdimm_bus_descriptor *nd_desc)
+{
+	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+	/*
+	 * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
+	 * acpi_device.
+	 */
+	if (!nd_desc->provider_name
+			|| strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
+		return NULL;
+
+	return to_acpi_device(acpi_desc->dev);
+}
+
 static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len)
 {
-	return -ENOTTY;
+	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+	const struct nd_cmd_desc *desc = NULL;
+	union acpi_object in_obj, in_buf, *out_obj;
+	struct device *dev = acpi_desc->dev;
+	const char *cmd_name, *dimm_name;
+	unsigned long dsm_mask;
+	acpi_handle handle;
+	const u8 *uuid;
+	u32 offset;
+	int rc, i;
+
+	if (nvdimm) {
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+		struct acpi_device *adev = nfit_mem->adev;
+
+		if (!adev)
+			return -ENOTTY;
+		dimm_name = dev_name(&adev->dev);
+		cmd_name = nvdimm_cmd_name(cmd);
+		dsm_mask = nfit_mem->dsm_mask;
+		desc = nd_cmd_dimm_desc(cmd);
+		uuid = to_nfit_uuid(NFIT_DEV_DIMM);
+		handle = adev->handle;
+	} else {
+		struct acpi_device *adev = to_acpi_dev(acpi_desc);
+
+		cmd_name = nvdimm_bus_cmd_name(cmd);
+		dsm_mask = nd_desc->dsm_mask;
+		desc = nd_cmd_bus_desc(cmd);
+		uuid = to_nfit_uuid(NFIT_DEV_BUS);
+		handle = adev->handle;
+		dimm_name = "bus";
+	}
+
+	if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
+		return -ENOTTY;
+
+	if (!test_bit(cmd, &dsm_mask))
+		return -ENOTTY;
+
+	in_obj.type = ACPI_TYPE_PACKAGE;
+	in_obj.package.count = 1;
+	in_obj.package.elements = &in_buf;
+	in_buf.type = ACPI_TYPE_BUFFER;
+	in_buf.buffer.pointer = buf;
+	in_buf.buffer.length = 0;
+
+	/* libnvdimm has already validated the input envelope */
+	for (i = 0; i < desc->in_num; i++)
+		in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
+				i, buf);
+
+	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+		dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__,
+				dimm_name, cmd_name, in_buf.buffer.length);
+		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+				4, in_buf.buffer.pointer, min_t(u32, 128,
+					in_buf.buffer.length), true);
+	}
+
+	out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj);
+	if (!out_obj) {
+		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
+				cmd_name);
+		return -EINVAL;
+	}
+
+	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
+		dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
+				__func__, dimm_name, cmd_name, out_obj->type);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+		dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
+				dimm_name, cmd_name, out_obj->buffer.length);
+		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+				4, out_obj->buffer.pointer, min_t(u32, 128,
+					out_obj->buffer.length), true);
+	}
+
+	for (i = 0, offset = 0; i < desc->out_num; i++) {
+		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
+				(u32 *) out_obj->buffer.pointer);
+
+		if (offset + out_size > out_obj->buffer.length) {
+			dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			break;
+		}
+
+		if (in_buf.buffer.length + offset + out_size > buf_len) {
+			dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			rc = -ENXIO;
+			goto out;
+		}
+		memcpy(buf + in_buf.buffer.length + offset,
+				out_obj->buffer.pointer + offset, out_size);
+		offset += out_size;
+	}
+	if (offset + in_buf.buffer.length < buf_len) {
+		if (i >= 1) {
+			/*
+			 * status valid, return the number of bytes left
+			 * unfilled in the output buffer
+			 */
+			rc = buf_len - offset - in_buf.buffer.length;
+		} else {
+			dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
+					__func__, dimm_name, cmd_name, buf_len,
+					offset);
+			rc = -ENXIO;
+		}
+	} else
+		rc = 0;
+
+ out:
+	ACPI_FREE(out_obj);
+
+	return rc;
 }
 
 static const char *spa_type_name(u16 type)
@@ -489,6 +632,7 @@ static struct attribute_group acpi_nfit_dimm_attribute_group = {
 };
 
 static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
+	&nvdimm_attribute_group,
 	&acpi_nfit_dimm_attribute_group,
 	NULL,
 };
@@ -505,6 +649,50 @@ static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
 	return NULL;
 }
 
+static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_mem *nfit_mem, u32 device_handle)
+{
+	struct acpi_device *adev, *adev_dimm;
+	struct device *dev = acpi_desc->dev;
+	const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
+	unsigned long long sta;
+	int i, rc = -ENODEV;
+	acpi_status status;
+
+	nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
+	adev = to_acpi_dev(acpi_desc);
+	if (!adev)
+		return 0;
+
+	adev_dimm = acpi_find_child_device(adev, device_handle, false);
+	nfit_mem->adev = adev_dimm;
+	if (!adev_dimm) {
+		dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
+				device_handle);
+		return -ENODEV;
+	}
+
+	status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
+	if (status == AE_NOT_FOUND) {
+		dev_dbg(dev, "%s missing _STA, assuming enabled...\n",
+				dev_name(&adev_dimm->dev));
+		rc = 0;
+	} else if (ACPI_FAILURE(status))
+		dev_err(dev, "%s failed to retrieve_STA, disabling...\n",
+				dev_name(&adev_dimm->dev));
+	else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0)
+		dev_info(dev, "%s disabled by firmware\n",
+				dev_name(&adev_dimm->dev));
+	else
+		rc = 0;
+
+	for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
+		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+			set_bit(i, &nfit_mem->dsm_mask);
+
+	return rc;
+}
+
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_mem *nfit_mem;
@@ -513,6 +701,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 		struct nvdimm *nvdimm;
 		unsigned long flags = 0;
 		u32 device_handle;
+		int rc;
 
 		device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
 		nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
@@ -529,8 +718,13 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 		if (nfit_mem->bdw && nfit_mem->memdev_pmem)
 			flags |= NDD_ALIASING;
 
+		rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
+		if (rc)
+			continue;
+
 		nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
-				acpi_nfit_dimm_attribute_groups, flags);
+				acpi_nfit_dimm_attribute_groups,
+				flags, &nfit_mem->dsm_mask);
 		if (!nvdimm)
 			return -ENOMEM;
 
@@ -540,6 +734,22 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 	return 0;
 }
 
+static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+	const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+	struct acpi_device *adev;
+	int i;
+
+	adev = to_acpi_dev(acpi_desc);
+	if (!adev)
+		return;
+
+	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++)
+		if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+			set_bit(i, &nd_desc->dsm_mask);
+}
+
 static int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
 {
 	struct device *dev = acpi_desc->dev;
@@ -567,6 +777,8 @@ static int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
 	if (nfit_mem_init(acpi_desc) != 0)
 		return -ENOMEM;
 
+	acpi_nfit_init_dsms(acpi_desc);
+
 	return acpi_nfit_register_dimms(acpi_desc);
 }
 
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 9dd437fe5563..b76e33629098 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -67,6 +67,8 @@ struct nfit_mem {
 	struct acpi_nfit_system_address *spa_dcr;
 	struct acpi_nfit_system_address *spa_bdw;
 	struct list_head list;
+	struct acpi_device *adev;
+	unsigned long dsm_mask;
 };
 
 struct acpi_nfit_desc {
@@ -79,6 +81,7 @@ struct acpi_nfit_desc {
 	struct list_head bdws;
 	struct nvdimm_bus *nvdimm_bus;
 	struct device *dev;
+	unsigned long dimm_dsm_force_en;
 };
 
 static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index a8802577fb55..15f3a3ddc225 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -11,14 +11,18 @@
  * General Public License for more details.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/vmalloc.h>
 #include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
+#include <linux/ndctl.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/io.h>
+#include <linux/mm.h>
 #include "nd-core.h"
 
+int nvdimm_major;
 static int nvdimm_bus_major;
 static struct class *nd_class;
 
@@ -47,19 +51,325 @@ void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
 	device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id));
 }
 
+static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
+	[ND_CMD_IMPLEMENTED] = { },
+	[ND_CMD_SMART] = {
+		.out_num = 2,
+		.out_sizes = { 4, 8, },
+	},
+	[ND_CMD_SMART_THRESHOLD] = {
+		.out_num = 2,
+		.out_sizes = { 4, 8, },
+	},
+	[ND_CMD_DIMM_FLAGS] = {
+		.out_num = 2,
+		.out_sizes = { 4, 4 },
+	},
+	[ND_CMD_GET_CONFIG_SIZE] = {
+		.out_num = 3,
+		.out_sizes = { 4, 4, 4, },
+	},
+	[ND_CMD_GET_CONFIG_DATA] = {
+		.in_num = 2,
+		.in_sizes = { 4, 4, },
+		.out_num = 2,
+		.out_sizes = { 4, UINT_MAX, },
+	},
+	[ND_CMD_SET_CONFIG_DATA] = {
+		.in_num = 3,
+		.in_sizes = { 4, 4, UINT_MAX, },
+		.out_num = 1,
+		.out_sizes = { 4, },
+	},
+	[ND_CMD_VENDOR] = {
+		.in_num = 3,
+		.in_sizes = { 4, 4, UINT_MAX, },
+		.out_num = 3,
+		.out_sizes = { 4, 4, UINT_MAX, },
+	},
+};
+
+const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
+{
+	if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs))
+		return &__nd_cmd_dimm_descs[cmd];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc);
+
+static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
+	[ND_CMD_IMPLEMENTED] = { },
+	[ND_CMD_ARS_CAP] = {
+		.in_num = 2,
+		.in_sizes = { 8, 8, },
+		.out_num = 2,
+		.out_sizes = { 4, 4, },
+	},
+	[ND_CMD_ARS_START] = {
+		.in_num = 4,
+		.in_sizes = { 8, 8, 2, 6, },
+		.out_num = 1,
+		.out_sizes = { 4, },
+	},
+	[ND_CMD_ARS_STATUS] = {
+		.out_num = 2,
+		.out_sizes = { 4, UINT_MAX, },
+	},
+};
+
+const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
+{
+	if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs))
+		return &__nd_cmd_bus_descs[cmd];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_bus_desc);
+
+u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, void *buf)
+{
+	if (idx >= desc->in_num)
+		return UINT_MAX;
+
+	if (desc->in_sizes[idx] < UINT_MAX)
+		return desc->in_sizes[idx];
+
+	if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) {
+		struct nd_cmd_set_config_hdr *hdr = buf;
+
+		return hdr->in_length;
+	} else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) {
+		struct nd_cmd_vendor_hdr *hdr = buf;
+
+		return hdr->in_length;
+	}
+
+	return UINT_MAX;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_in_size);
+
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+		const u32 *out_field)
+{
+	if (idx >= desc->out_num)
+		return UINT_MAX;
+
+	if (desc->out_sizes[idx] < UINT_MAX)
+		return desc->out_sizes[idx];
+
+	if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1)
+		return in_field[1];
+	else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
+		return out_field[1];
+	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
+		return ND_CMD_ARS_STATUS_MAX;
+
+	return UINT_MAX;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_out_size);
+
+static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+		int read_only, unsigned int ioctl_cmd, unsigned long arg)
+{
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+	size_t buf_len = 0, in_len = 0, out_len = 0;
+	static char out_env[ND_CMD_MAX_ENVELOPE];
+	static char in_env[ND_CMD_MAX_ENVELOPE];
+	const struct nd_cmd_desc *desc = NULL;
+	unsigned int cmd = _IOC_NR(ioctl_cmd);
+	void __user *p = (void __user *) arg;
+	struct device *dev = &nvdimm_bus->dev;
+	const char *cmd_name, *dimm_name;
+	unsigned long dsm_mask;
+	void *buf;
+	int rc, i;
+
+	if (nvdimm) {
+		desc = nd_cmd_dimm_desc(cmd);
+		cmd_name = nvdimm_cmd_name(cmd);
+		dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0;
+		dimm_name = dev_name(&nvdimm->dev);
+	} else {
+		desc = nd_cmd_bus_desc(cmd);
+		cmd_name = nvdimm_bus_cmd_name(cmd);
+		dsm_mask = nd_desc->dsm_mask;
+		dimm_name = "bus";
+	}
+
+	if (!desc || (desc->out_num + desc->in_num == 0) ||
+			!test_bit(cmd, &dsm_mask))
+		return -ENOTTY;
+
+	/* fail write commands (when read-only) */
+	if (read_only)
+		switch (ioctl_cmd) {
+		case ND_IOCTL_VENDOR:
+		case ND_IOCTL_SET_CONFIG_DATA:
+		case ND_IOCTL_ARS_START:
+			dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
+					nvdimm ? nvdimm_cmd_name(cmd)
+					: nvdimm_bus_cmd_name(cmd));
+			return -EPERM;
+		default:
+			break;
+		}
+
+	/* process an input envelope */
+	for (i = 0; i < desc->in_num; i++) {
+		u32 in_size, copy;
+
+		in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env);
+		if (in_size == UINT_MAX) {
+			dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			return -ENXIO;
+		}
+		if (!access_ok(VERIFY_READ, p + in_len, in_size))
+			return -EFAULT;
+		if (in_len < sizeof(in_env))
+			copy = min_t(u32, sizeof(in_env) - in_len, in_size);
+		else
+			copy = 0;
+		if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
+			return -EFAULT;
+		in_len += in_size;
+	}
+
+	/* process an output envelope */
+	for (i = 0; i < desc->out_num; i++) {
+		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
+				(u32 *) in_env, (u32 *) out_env);
+		u32 copy;
+
+		if (out_size == UINT_MAX) {
+			dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			return -EFAULT;
+		}
+		if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size))
+			return -EFAULT;
+		if (out_len < sizeof(out_env))
+			copy = min_t(u32, sizeof(out_env) - out_len, out_size);
+		else
+			copy = 0;
+		if (copy && copy_from_user(&out_env[out_len],
+					p + in_len + out_len, copy))
+			return -EFAULT;
+		out_len += out_size;
+	}
+
+	buf_len = out_len + in_len;
+	if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len)))
+		return -EFAULT;
+
+	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
+		dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__,
+				dimm_name, cmd_name, buf_len,
+				ND_IOCTL_MAX_BUFLEN);
+		return -EINVAL;
+	}
+
+	buf = vmalloc(buf_len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, p, buf_len)) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len);
+	if (rc < 0)
+		goto out;
+	if (copy_to_user(p, buf, buf_len))
+		rc = -EFAULT;
+ out:
+	vfree(buf);
+	return rc;
+}
+
 static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	return -ENXIO;
+	long id = (long) file->private_data;
+	int rc = -ENXIO, read_only;
+	struct nvdimm_bus *nvdimm_bus;
+
+	read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
+		if (nvdimm_bus->id == id) {
+			rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg);
+			break;
+		}
+	}
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return rc;
+}
+
+static int match_dimm(struct device *dev, void *data)
+{
+	long id = (long) data;
+
+	if (is_nvdimm(dev)) {
+		struct nvdimm *nvdimm = to_nvdimm(dev);
+
+		return nvdimm->id == id;
+	}
+
+	return 0;
+}
+
+static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc = -ENXIO, read_only;
+	struct nvdimm_bus *nvdimm_bus;
+
+	read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
+		struct device *dev = device_find_child(&nvdimm_bus->dev,
+				file->private_data, match_dimm);
+		struct nvdimm *nvdimm;
+
+		if (!dev)
+			continue;
+
+		nvdimm = to_nvdimm(dev);
+		rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg);
+		put_device(dev);
+		break;
+	}
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return rc;
+}
+
+static int nd_open(struct inode *inode, struct file *file)
+{
+	long minor = iminor(inode);
+
+	file->private_data = (void *) minor;
+	return 0;
 }
 
 static const struct file_operations nvdimm_bus_fops = {
 	.owner = THIS_MODULE,
-	.open = nonseekable_open,
+	.open = nd_open,
 	.unlocked_ioctl = nd_ioctl,
 	.compat_ioctl = nd_ioctl,
 	.llseek = noop_llseek,
 };
 
+static const struct file_operations nvdimm_fops = {
+	.owner = THIS_MODULE,
+	.open = nd_open,
+	.unlocked_ioctl = nvdimm_ioctl,
+	.compat_ioctl = nvdimm_ioctl,
+	.llseek = noop_llseek,
+};
+
 int __init nvdimm_bus_init(void)
 {
 	int rc;
@@ -70,9 +380,14 @@ int __init nvdimm_bus_init(void)
 
 	rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops);
 	if (rc < 0)
-		goto err_chrdev;
+		goto err_bus_chrdev;
 	nvdimm_bus_major = rc;
 
+	rc = register_chrdev(0, "dimmctl", &nvdimm_fops);
+	if (rc < 0)
+		goto err_dimm_chrdev;
+	nvdimm_major = rc;
+
 	nd_class = class_create(THIS_MODULE, "nd");
 	if (IS_ERR(nd_class))
 		goto err_class;
@@ -80,8 +395,10 @@ int __init nvdimm_bus_init(void)
 	return 0;
 
  err_class:
+	unregister_chrdev(nvdimm_major, "dimmctl");
+ err_dimm_chrdev:
 	unregister_chrdev(nvdimm_bus_major, "ndctl");
- err_chrdev:
+ err_bus_chrdev:
 	bus_unregister(&nvdimm_bus_type);
 
 	return rc;
@@ -91,5 +408,6 @@ void __exit nvdimm_bus_exit(void)
 {
 	class_destroy(nd_class);
 	unregister_chrdev(nvdimm_bus_major, "ndctl");
+	unregister_chrdev(nvdimm_major, "dimmctl");
 	bus_unregister(&nvdimm_bus_type);
 }
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index ef957eb37c90..1ce159095c52 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/ndctl.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include "nd-core.h"
@@ -61,6 +62,20 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
 	return NULL;
 }
 
+static ssize_t commands_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int cmd, len = 0;
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+
+	for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG)
+		len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+static DEVICE_ATTR_RO(commands);
+
 static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus)
 {
 	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
@@ -84,6 +99,7 @@ static ssize_t provider_show(struct device *dev,
 static DEVICE_ATTR_RO(provider);
 
 static struct attribute *nvdimm_bus_attributes[] = {
+	&dev_attr_commands.attr,
 	&dev_attr_provider.attr,
 	NULL,
 };
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 51ea52cc2079..c3dd7227d1bb 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/device.h>
+#include <linux/ndctl.h>
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/fs.h>
@@ -33,7 +34,7 @@ static struct device_type nvdimm_device_type = {
 	.release = nvdimm_release,
 };
 
-static bool is_nvdimm(struct device *dev)
+bool is_nvdimm(struct device *dev)
 {
 	return dev->type == &nvdimm_device_type;
 }
@@ -55,12 +56,41 @@ EXPORT_SYMBOL_GPL(nvdimm_name);
 
 void *nvdimm_provider_data(struct nvdimm *nvdimm)
 {
-	return nvdimm->provider_data;
+	if (nvdimm)
+		return nvdimm->provider_data;
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(nvdimm_provider_data);
 
+static ssize_t commands_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	int cmd, len = 0;
+
+	if (!nvdimm->dsm_mask)
+		return sprintf(buf, "\n");
+
+	for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG)
+		len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd));
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+static DEVICE_ATTR_RO(commands);
+
+static struct attribute *nvdimm_attributes[] = {
+	&dev_attr_commands.attr,
+	NULL,
+};
+
+struct attribute_group nvdimm_attribute_group = {
+	.attrs = nvdimm_attributes,
+};
+EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
+
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
-		const struct attribute_group **groups, unsigned long flags)
+		const struct attribute_group **groups, unsigned long flags,
+		unsigned long *dsm_mask)
 {
 	struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
 	struct device *dev;
@@ -75,12 +105,14 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 	}
 	nvdimm->provider_data = provider_data;
 	nvdimm->flags = flags;
+	nvdimm->dsm_mask = dsm_mask;
 
 	dev = &nvdimm->dev;
 	dev_set_name(dev, "nmem%d", nvdimm->id);
 	dev->parent = &nvdimm_bus->dev;
 	dev->type = &nvdimm_device_type;
 	dev->bus = &nvdimm_bus_type;
+	dev->devt = MKDEV(nvdimm_major, nvdimm->id);
 	dev->groups = groups;
 	if (device_register(dev) != 0) {
 		put_device(dev);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 9b8303413b60..59528b3c9de8 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -18,6 +18,7 @@
 extern struct list_head nvdimm_bus_list;
 extern struct mutex nvdimm_bus_list_mutex;
 extern struct bus_type nvdimm_bus_type;
+extern int nvdimm_major;
 
 struct nvdimm_bus {
 	struct nvdimm_bus_descriptor *nd_desc;
@@ -29,10 +30,12 @@ struct nvdimm_bus {
 struct nvdimm {
 	unsigned long flags;
 	void *provider_data;
+	unsigned long *dsm_mask;
 	struct device dev;
 	int id;
 };
 
+bool is_nvdimm(struct device *dev);
 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
 int __init nvdimm_bus_init(void);
 void __exit nvdimm_bus_exit(void);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 07787f0dd7de..a39235819af3 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -14,13 +14,22 @@
  */
 #ifndef __LIBNVDIMM_H__
 #define __LIBNVDIMM_H__
+#include <linux/sizes.h>
+#include <linux/types.h>
 
 enum {
 	/* when a dimm supports both PMEM and BLK access a label is required */
 	NDD_ALIASING = 1 << 0,
+
+	/* need to set a limit somewhere, but yes, this is likely overkill */
+	ND_IOCTL_MAX_BUFLEN = SZ_4M,
+	ND_CMD_MAX_ELEM = 4,
+	ND_CMD_MAX_ENVELOPE = 16,
+	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 };
 
 extern struct attribute_group nvdimm_bus_attribute_group;
+extern struct attribute_group nvdimm_attribute_group;
 
 struct nvdimm;
 struct nvdimm_bus_descriptor;
@@ -35,6 +44,14 @@ struct nvdimm_bus_descriptor {
 	ndctl_fn ndctl;
 };
 
+struct nd_cmd_desc {
+	int in_num;
+	int out_num;
+	u32 in_sizes[ND_CMD_MAX_ELEM];
+	int out_sizes[ND_CMD_MAX_ELEM];
+};
+
+struct nvdimm_bus;
 struct device;
 struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc);
@@ -45,5 +62,13 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
 void *nvdimm_provider_data(struct nvdimm *nvdimm);
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
-		const struct attribute_group **groups, unsigned long flags);
+		const struct attribute_group **groups, unsigned long flags,
+		unsigned long *dsm_mask);
+const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
+const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
+u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, void *buf);
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+		const u32 *out_field);
 #endif /* __LIBNVDIMM_H__ */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 1a0006a76b00..200cc5ea2998 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -271,6 +271,7 @@ header-y += ncp_fs.h
 header-y += ncp.h
 header-y += ncp_mount.h
 header-y += ncp_no.h
+header-y += ndctl.h
 header-y += neighbour.h
 header-y += netconf.h
 header-y += netdevice.h
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
new file mode 100644
index 000000000000..ff13c23b26df
--- /dev/null
+++ b/include/uapi/linux/ndctl.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#ifndef __NDCTL_H__
+#define __NDCTL_H__
+
+#include <linux/types.h>
+
+struct nd_cmd_smart {
+	__u32 status;
+	__u8 data[128];
+} __packed;
+
+struct nd_cmd_smart_threshold {
+	__u32 status;
+	__u8 data[8];
+} __packed;
+
+struct nd_cmd_dimm_flags {
+	__u32 status;
+	__u32 flags;
+} __packed;
+
+struct nd_cmd_get_config_size {
+	__u32 status;
+	__u32 config_size;
+	__u32 max_xfer;
+} __packed;
+
+struct nd_cmd_get_config_data_hdr {
+	__u32 in_offset;
+	__u32 in_length;
+	__u32 status;
+	__u8 out_buf[0];
+} __packed;
+
+struct nd_cmd_set_config_hdr {
+	__u32 in_offset;
+	__u32 in_length;
+	__u8 in_buf[0];
+} __packed;
+
+struct nd_cmd_vendor_hdr {
+	__u32 opcode;
+	__u32 in_length;
+	__u8 in_buf[0];
+} __packed;
+
+struct nd_cmd_vendor_tail {
+	__u32 status;
+	__u32 out_length;
+	__u8 out_buf[0];
+} __packed;
+
+struct nd_cmd_ars_cap {
+	__u64 address;
+	__u64 length;
+	__u32 status;
+	__u32 max_ars_out;
+} __packed;
+
+struct nd_cmd_ars_start {
+	__u64 address;
+	__u64 length;
+	__u16 type;
+	__u8 reserved[6];
+	__u32 status;
+} __packed;
+
+struct nd_cmd_ars_status {
+	__u32 status;
+	__u32 out_length;
+	__u64 address;
+	__u64 length;
+	__u16 type;
+	__u32 num_records;
+	struct nd_ars_record {
+		__u32 handle;
+		__u32 flags;
+		__u64 err_address;
+		__u64 mask;
+	} __packed records[0];
+} __packed;
+
+enum {
+	ND_CMD_IMPLEMENTED = 0,
+
+	/* bus commands */
+	ND_CMD_ARS_CAP = 1,
+	ND_CMD_ARS_START = 2,
+	ND_CMD_ARS_STATUS = 3,
+
+	/* per-dimm commands */
+	ND_CMD_SMART = 1,
+	ND_CMD_SMART_THRESHOLD = 2,
+	ND_CMD_DIMM_FLAGS = 3,
+	ND_CMD_GET_CONFIG_SIZE = 4,
+	ND_CMD_GET_CONFIG_DATA = 5,
+	ND_CMD_SET_CONFIG_DATA = 6,
+	ND_CMD_VENDOR_EFFECT_LOG_SIZE = 7,
+	ND_CMD_VENDOR_EFFECT_LOG = 8,
+	ND_CMD_VENDOR = 9,
+};
+
+static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
+{
+	static const char * const names[] = {
+		[ND_CMD_ARS_CAP] = "ars_cap",
+		[ND_CMD_ARS_START] = "ars_start",
+		[ND_CMD_ARS_STATUS] = "ars_status",
+	};
+
+	if (cmd < ARRAY_SIZE(names) && names[cmd])
+		return names[cmd];
+	return "unknown";
+}
+
+static inline const char *nvdimm_cmd_name(unsigned cmd)
+{
+	static const char * const names[] = {
+		[ND_CMD_SMART] = "smart",
+		[ND_CMD_SMART_THRESHOLD] = "smart_thresh",
+		[ND_CMD_DIMM_FLAGS] = "flags",
+		[ND_CMD_GET_CONFIG_SIZE] = "get_size",
+		[ND_CMD_GET_CONFIG_DATA] = "get_data",
+		[ND_CMD_SET_CONFIG_DATA] = "set_data",
+		[ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size",
+		[ND_CMD_VENDOR_EFFECT_LOG] = "effect_log",
+		[ND_CMD_VENDOR] = "vendor",
+	};
+
+	if (cmd < ARRAY_SIZE(names) && names[cmd])
+		return names[cmd];
+	return "unknown";
+}
+
+#define ND_IOCTL 'N'
+
+#define ND_IOCTL_SMART			_IOWR(ND_IOCTL, ND_CMD_SMART,\
+					struct nd_cmd_smart)
+
+#define ND_IOCTL_SMART_THRESHOLD	_IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\
+					struct nd_cmd_smart_threshold)
+
+#define ND_IOCTL_DIMM_FLAGS		_IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\
+					struct nd_cmd_dimm_flags)
+
+#define ND_IOCTL_GET_CONFIG_SIZE	_IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_SIZE,\
+					struct nd_cmd_get_config_size)
+
+#define ND_IOCTL_GET_CONFIG_DATA	_IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_DATA,\
+					struct nd_cmd_get_config_data_hdr)
+
+#define ND_IOCTL_SET_CONFIG_DATA	_IOWR(ND_IOCTL, ND_CMD_SET_CONFIG_DATA,\
+					struct nd_cmd_set_config_hdr)
+
+#define ND_IOCTL_VENDOR			_IOWR(ND_IOCTL, ND_CMD_VENDOR,\
+					struct nd_cmd_vendor_hdr)
+
+#define ND_IOCTL_ARS_CAP		_IOWR(ND_IOCTL, ND_CMD_ARS_CAP,\
+					struct nd_cmd_ars_cap)
+
+#define ND_IOCTL_ARS_START		_IOWR(ND_IOCTL, ND_CMD_ARS_START,\
+					struct nd_cmd_ars_start)
+
+#define ND_IOCTL_ARS_STATUS		_IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\
+					struct nd_cmd_ars_status)
+
+#endif /* __NDCTL_H__ */
-- 
cgit v1.2.3


From 4d88a97aa9e8cfa6460aab119c5da60ad2267423 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 31 May 2015 14:41:48 -0400
Subject: libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver
 infrastructure

* Implement the device-model infrastructure for loading modules and
  attaching drivers to nvdimm devices.  This is a simple association of a
  nd-device-type number with a driver that has a bitmask of supported
  device types.  To facilitate userspace bind/unbind operations 'modalias'
  and 'devtype', that also appear in the uevent, are added as generic
  sysfs attributes for all nvdimm devices.  The reason for the device-type
  number is to support sub-types within a given parent devtype, be it a
  vendor-specific sub-type or otherwise.

* The first consumer of this infrastructure is the driver
  for dimm devices.  It simply uses control messages to retrieve and
  store the configuration-data image (label set) from each dimm.

Note: nd_device_register() arranges for asynchronous registration of
      nvdimm bus devices by default.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c        |  13 +++-
 drivers/nvdimm/Makefile    |   1 +
 drivers/nvdimm/bus.c       | 168 ++++++++++++++++++++++++++++++++++++++++++++-
 drivers/nvdimm/core.c      |  43 +++++++++++-
 drivers/nvdimm/dimm.c      |  92 +++++++++++++++++++++++++
 drivers/nvdimm/dimm_devs.c | 136 ++++++++++++++++++++++++++++++++++--
 drivers/nvdimm/nd-core.h   |   6 +-
 drivers/nvdimm/nd.h        |  36 ++++++++++
 include/linux/libnvdimm.h  |   2 +
 include/linux/nd.h         |  39 +++++++++++
 include/uapi/linux/ndctl.h |   6 ++
 11 files changed, 527 insertions(+), 15 deletions(-)
 create mode 100644 drivers/nvdimm/dimm.c
 create mode 100644 drivers/nvdimm/nd.h
 create mode 100644 include/linux/nd.h

(limited to 'include/uapi/linux')

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 9112a6210a4b..c4ccec1bc60b 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -18,6 +18,10 @@
 #include <linux/acpi.h>
 #include "nfit.h"
 
+static bool force_enable_dimms;
+module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
+
 static u8 nfit_uuid[NFIT_UUID_MAX][16];
 
 static const u8 *to_nfit_uuid(enum nfit_uuids id)
@@ -633,6 +637,7 @@ static struct attribute_group acpi_nfit_dimm_attribute_group = {
 
 static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
 	&nvdimm_attribute_group,
+	&nd_device_attribute_group,
 	&acpi_nfit_dimm_attribute_group,
 	NULL,
 };
@@ -669,7 +674,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	if (!adev_dimm) {
 		dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
 				device_handle);
-		return -ENODEV;
+		return force_enable_dimms ? 0 : -ENODEV;
 	}
 
 	status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
@@ -690,12 +695,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
-	return rc;
+	return force_enable_dimms ? 0 : rc;
 }
 
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_mem *nfit_mem;
+	int dimm_count = 0;
 
 	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
 		struct nvdimm *nvdimm;
@@ -729,9 +735,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			return -ENOMEM;
 
 		nfit_mem->nvdimm = nvdimm;
+		dimm_count++;
 	}
 
-	return 0;
+	return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
 }
 
 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 5b68738ba406..d44b5c1fcd3b 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 libnvdimm-y := core.o
 libnvdimm-y += bus.o
 libnvdimm-y += dimm_devs.o
+libnvdimm-y += dimm.o
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 15f3a3ddc225..a0308f1872bf 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -16,19 +16,183 @@
 #include <linux/fcntl.h>
 #include <linux/async.h>
 #include <linux/ndctl.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/nd.h>
 #include "nd-core.h"
+#include "nd.h"
 
 int nvdimm_major;
 static int nvdimm_bus_major;
 static struct class *nd_class;
 
-struct bus_type nvdimm_bus_type = {
+static int to_nd_device_type(struct device *dev)
+{
+	if (is_nvdimm(dev))
+		return ND_DEVICE_DIMM;
+
+	return 0;
+}
+
+static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
+			to_nd_device_type(dev));
+}
+
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+	return test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
+static int nvdimm_bus_probe(struct device *dev)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	int rc;
+
+	rc = nd_drv->probe(dev);
+	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
+			dev_name(dev), rc);
+	return rc;
+}
+
+static int nvdimm_bus_remove(struct device *dev)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	int rc;
+
+	rc = nd_drv->remove(dev);
+	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
+			dev_name(dev), rc);
+	return rc;
+}
+
+static struct bus_type nvdimm_bus_type = {
 	.name = "nd",
+	.uevent = nvdimm_bus_uevent,
+	.match = nvdimm_bus_match,
+	.probe = nvdimm_bus_probe,
+	.remove = nvdimm_bus_remove,
+};
+
+static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
+
+void nd_synchronize(void)
+{
+	async_synchronize_full_domain(&nd_async_domain);
+}
+EXPORT_SYMBOL_GPL(nd_synchronize);
+
+static void nd_async_device_register(void *d, async_cookie_t cookie)
+{
+	struct device *dev = d;
+
+	if (device_add(dev) != 0) {
+		dev_err(dev, "%s: failed\n", __func__);
+		put_device(dev);
+	}
+	put_device(dev);
+}
+
+static void nd_async_device_unregister(void *d, async_cookie_t cookie)
+{
+	struct device *dev = d;
+
+	device_unregister(dev);
+	put_device(dev);
+}
+
+void nd_device_register(struct device *dev)
+{
+	dev->bus = &nvdimm_bus_type;
+	device_initialize(dev);
+	get_device(dev);
+	async_schedule_domain(nd_async_device_register, dev,
+			&nd_async_domain);
+}
+EXPORT_SYMBOL(nd_device_register);
+
+void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
+{
+	switch (mode) {
+	case ND_ASYNC:
+		get_device(dev);
+		async_schedule_domain(nd_async_device_unregister, dev,
+				&nd_async_domain);
+		break;
+	case ND_SYNC:
+		nd_synchronize();
+		device_unregister(dev);
+		break;
+	}
+}
+EXPORT_SYMBOL(nd_device_unregister);
+
+/**
+ * __nd_driver_register() - register a region or a namespace driver
+ * @nd_drv: driver to register
+ * @owner: automatically set by nd_driver_register() macro
+ * @mod_name: automatically set by nd_driver_register() macro
+ */
+int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
+		const char *mod_name)
+{
+	struct device_driver *drv = &nd_drv->drv;
+
+	if (!nd_drv->type) {
+		pr_debug("driver type bitmask not set (%pf)\n",
+				__builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	if (!nd_drv->probe || !nd_drv->remove) {
+		pr_debug("->probe() and ->remove() must be specified\n");
+		return -EINVAL;
+	}
+
+	drv->bus = &nvdimm_bus_type;
+	drv->owner = owner;
+	drv->mod_name = mod_name;
+
+	return driver_register(drv);
+}
+EXPORT_SYMBOL(__nd_driver_register);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n",
+			to_nd_device_type(dev));
+}
+static DEVICE_ATTR_RO(modalias);
+
+static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, "%s\n", dev->type->name);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static struct attribute *nd_device_attributes[] = {
+	&dev_attr_modalias.attr,
+	&dev_attr_devtype.attr,
+	NULL,
+};
+
+/**
+ * nd_device_attribute_group - generic attributes for all devices on an nd bus
+ */
+struct attribute_group nd_device_attribute_group = {
+	.attrs = nd_device_attributes,
 };
+EXPORT_SYMBOL_GPL(nd_device_attribute_group);
 
 int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
 {
@@ -404,7 +568,7 @@ int __init nvdimm_bus_init(void)
 	return rc;
 }
 
-void __exit nvdimm_bus_exit(void)
+void nvdimm_bus_exit(void)
 {
 	class_destroy(nd_class);
 	unregister_chrdev(nvdimm_bus_major, "ndctl");
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 1ce159095c52..50ab880f0dc0 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include "nd-core.h"
+#include "nd.h"
 
 LIST_HEAD(nvdimm_bus_list);
 DEFINE_MUTEX(nvdimm_bus_list_mutex);
@@ -98,8 +99,33 @@ static ssize_t provider_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(provider);
 
+static int flush_namespaces(struct device *dev, void *data)
+{
+	device_lock(dev);
+	device_unlock(dev);
+	return 0;
+}
+
+static int flush_regions_dimms(struct device *dev, void *data)
+{
+	device_lock(dev);
+	device_unlock(dev);
+	device_for_each_child(dev, NULL, flush_namespaces);
+	return 0;
+}
+
+static ssize_t wait_probe_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	nd_synchronize();
+	device_for_each_child(dev, NULL, flush_regions_dimms);
+	return sprintf(buf, "1\n");
+}
+static DEVICE_ATTR_RO(wait_probe);
+
 static struct attribute *nvdimm_bus_attributes[] = {
 	&dev_attr_commands.attr,
+	&dev_attr_wait_probe.attr,
 	&dev_attr_provider.attr,
 	NULL,
 };
@@ -161,7 +187,7 @@ static int child_unregister(struct device *dev, void *data)
 	if (dev->class)
 		/* pass */;
 	else
-		device_unregister(dev);
+		nd_device_unregister(dev, ND_SYNC);
 	return 0;
 }
 
@@ -174,6 +200,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
 	list_del_init(&nvdimm_bus->list);
 	mutex_unlock(&nvdimm_bus_list_mutex);
 
+	nd_synchronize();
 	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
 	nvdimm_bus_destroy_ndctl(nvdimm_bus);
 
@@ -183,12 +210,24 @@ EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
 
 static __init int libnvdimm_init(void)
 {
-	return nvdimm_bus_init();
+	int rc;
+
+	rc = nvdimm_bus_init();
+	if (rc)
+		return rc;
+	rc = nvdimm_init();
+	if (rc)
+		goto err_dimm;
+	return 0;
+ err_dimm:
+	nvdimm_bus_exit();
+	return rc;
 }
 
 static __exit void libnvdimm_exit(void)
 {
 	WARN_ON(!list_empty(&nvdimm_bus_list));
+	nvdimm_exit();
 	nvdimm_bus_exit();
 }
 
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
new file mode 100644
index 000000000000..28001a6ccd4e
--- /dev/null
+++ b/drivers/nvdimm/dimm.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+static void free_data(struct nvdimm_drvdata *ndd)
+{
+	if (!ndd)
+		return;
+
+	if (ndd->data && is_vmalloc_addr(ndd->data))
+		vfree(ndd->data);
+	else
+		kfree(ndd->data);
+	kfree(ndd);
+}
+
+static int nvdimm_probe(struct device *dev)
+{
+	struct nvdimm_drvdata *ndd;
+	int rc;
+
+	ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
+	if (!ndd)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, ndd);
+	ndd->dev = dev;
+
+	rc = nvdimm_init_nsarea(ndd);
+	if (rc)
+		goto err;
+
+	rc = nvdimm_init_config_data(ndd);
+	if (rc)
+		goto err;
+
+	dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
+
+	return 0;
+
+ err:
+	free_data(ndd);
+	return rc;
+}
+
+static int nvdimm_remove(struct device *dev)
+{
+	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+
+	free_data(ndd);
+
+	return 0;
+}
+
+static struct nd_device_driver nvdimm_driver = {
+	.probe = nvdimm_probe,
+	.remove = nvdimm_remove,
+	.drv = {
+		.name = "nvdimm",
+	},
+	.type = ND_DRIVER_DIMM,
+};
+
+int __init nvdimm_init(void)
+{
+	return nd_driver_register(&nvdimm_driver);
+}
+
+void __exit nvdimm_exit(void)
+{
+	driver_unregister(&nvdimm_driver.drv);
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DIMM);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index c3dd7227d1bb..b3ae86f2e1da 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -11,6 +11,7 @@
  * General Public License for more details.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/vmalloc.h>
 #include <linux/device.h>
 #include <linux/ndctl.h>
 #include <linux/slab.h>
@@ -18,9 +19,115 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include "nd-core.h"
+#include "nd.h"
 
 static DEFINE_IDA(dimm_ida);
 
+/*
+ * Retrieve bus and dimm handle and return if this bus supports
+ * get_config_data commands
+ */
+static int __validate_dimm(struct nvdimm_drvdata *ndd)
+{
+	struct nvdimm *nvdimm;
+
+	if (!ndd)
+		return -EINVAL;
+
+	nvdimm = to_nvdimm(ndd->dev);
+
+	if (!nvdimm->dsm_mask)
+		return -ENXIO;
+	if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask))
+		return -ENXIO;
+
+	return 0;
+}
+
+static int validate_dimm(struct nvdimm_drvdata *ndd)
+{
+	int rc = __validate_dimm(ndd);
+
+	if (rc && ndd)
+		dev_dbg(ndd->dev, "%pf: %s error: %d\n",
+				__builtin_return_address(0), __func__, rc);
+	return rc;
+}
+
+/**
+ * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area
+ * @nvdimm: dimm to initialize
+ */
+int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
+{
+	struct nd_cmd_get_config_size *cmd = &ndd->nsarea;
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nvdimm_bus_descriptor *nd_desc;
+	int rc = validate_dimm(ndd);
+
+	if (rc)
+		return rc;
+
+	if (cmd->config_size)
+		return 0; /* already valid */
+
+	memset(cmd, 0, sizeof(*cmd));
+	nd_desc = nvdimm_bus->nd_desc;
+	return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+			ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd));
+}
+
+int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nd_cmd_get_config_data_hdr *cmd;
+	struct nvdimm_bus_descriptor *nd_desc;
+	int rc = validate_dimm(ndd);
+	u32 max_cmd_size, config_size;
+	size_t offset;
+
+	if (rc)
+		return rc;
+
+	if (ndd->data)
+		return 0;
+
+	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0)
+		return -ENXIO;
+
+	ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
+	if (!ndd->data)
+		ndd->data = vmalloc(ndd->nsarea.config_size);
+
+	if (!ndd->data)
+		return -ENOMEM;
+
+	max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
+	cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	nd_desc = nvdimm_bus->nd_desc;
+	for (config_size = ndd->nsarea.config_size, offset = 0;
+			config_size; config_size -= cmd->in_length,
+			offset += cmd->in_length) {
+		cmd->in_length = min(config_size, max_cmd_size);
+		cmd->in_offset = offset;
+		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+				ND_CMD_GET_CONFIG_DATA, cmd,
+				cmd->in_length + sizeof(*cmd));
+		if (rc || cmd->status) {
+			rc = -ENXIO;
+			break;
+		}
+		memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
+	}
+	dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
+	kfree(cmd);
+
+	return rc;
+}
+
 static void nvdimm_release(struct device *dev)
 {
 	struct nvdimm *nvdimm = to_nvdimm(dev);
@@ -111,14 +218,33 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 	dev_set_name(dev, "nmem%d", nvdimm->id);
 	dev->parent = &nvdimm_bus->dev;
 	dev->type = &nvdimm_device_type;
-	dev->bus = &nvdimm_bus_type;
 	dev->devt = MKDEV(nvdimm_major, nvdimm->id);
 	dev->groups = groups;
-	if (device_register(dev) != 0) {
-		put_device(dev);
-		return NULL;
-	}
+	nd_device_register(dev);
 
 	return nvdimm;
 }
 EXPORT_SYMBOL_GPL(nvdimm_create);
+
+static int count_dimms(struct device *dev, void *c)
+{
+	int *count = c;
+
+	if (is_nvdimm(dev))
+		(*count)++;
+	return 0;
+}
+
+int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
+{
+	int count = 0;
+	/* Flush any possible dimm registration failures */
+	nd_synchronize();
+
+	device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
+	dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
+	if (count != dimm_count)
+		return -ENXIO;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 59528b3c9de8..f2004b790874 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -17,7 +17,6 @@
 
 extern struct list_head nvdimm_bus_list;
 extern struct mutex nvdimm_bus_list_mutex;
-extern struct bus_type nvdimm_bus_type;
 extern int nvdimm_major;
 
 struct nvdimm_bus {
@@ -35,10 +34,11 @@ struct nvdimm {
 	int id;
 };
 
-bool is_nvdimm(struct device *dev);
 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
 int __init nvdimm_bus_init(void);
-void __exit nvdimm_bus_exit(void);
+void nvdimm_bus_exit(void);
 int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
+void nd_synchronize(void);
+bool is_nvdimm(struct device *dev);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
new file mode 100644
index 000000000000..1f7f6ecab0fc
--- /dev/null
+++ b/drivers/nvdimm/nd.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ND_H__
+#define __ND_H__
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+
+struct nvdimm_drvdata {
+	struct device *dev;
+	struct nd_cmd_get_config_size nsarea;
+	void *data;
+};
+
+enum nd_async_mode {
+	ND_SYNC,
+	ND_ASYNC,
+};
+
+void nd_device_register(struct device *dev);
+void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
+int __init nvdimm_init(void);
+void nvdimm_exit(void);
+int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
+int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+#endif /* __ND_H__ */
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index a39235819af3..d3ebccf4ea8b 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -30,6 +30,7 @@ enum {
 
 extern struct attribute_group nvdimm_bus_attribute_group;
 extern struct attribute_group nvdimm_attribute_group;
+extern struct attribute_group nd_device_attribute_group;
 
 struct nvdimm;
 struct nvdimm_bus_descriptor;
@@ -71,4 +72,5 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
 u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
 		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
 		const u32 *out_field);
+int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count);
 #endif /* __LIBNVDIMM_H__ */
diff --git a/include/linux/nd.h b/include/linux/nd.h
new file mode 100644
index 000000000000..e074f67e53a3
--- /dev/null
+++ b/include/linux/nd.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LINUX_ND_H__
+#define __LINUX_ND_H__
+#include <linux/ndctl.h>
+#include <linux/device.h>
+
+struct nd_device_driver {
+	struct device_driver drv;
+	unsigned long type;
+	int (*probe)(struct device *dev);
+	int (*remove)(struct device *dev);
+};
+
+static inline struct nd_device_driver *to_nd_device_driver(
+		struct device_driver *drv)
+{
+	return container_of(drv, struct nd_device_driver, drv);
+}
+
+#define MODULE_ALIAS_ND_DEVICE(type) \
+	MODULE_ALIAS("nd:t" __stringify(type) "*")
+#define ND_DEVICE_MODALIAS_FMT "nd:t%d"
+
+int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
+		struct module *module, const char *mod_name);
+#define nd_driver_register(driver) \
+	__nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#endif /* __LINUX_ND_H__ */
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index ff13c23b26df..37640916d146 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -175,4 +175,10 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 #define ND_IOCTL_ARS_STATUS		_IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\
 					struct nd_cmd_ars_status)
 
+
+#define ND_DEVICE_DIMM 1            /* nd_dimm: container for "config data" */
+
+enum nd_driver_flags {
+	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
+};
 #endif /* __NDCTL_H__ */
-- 
cgit v1.2.3


From 3d88002e4a7bd40f355550284c6cd140e6fe29dc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 31 May 2015 15:02:11 -0400
Subject: libnvdimm: support for legacy (non-aliasing) nvdimms

The libnvdimm region driver is an intermediary driver that translates
non-volatile "region"s into "namespace" sub-devices that are surfaced by
persistent memory block-device drivers (PMEM and BLK).

ACPI 6 introduces the concept that a given nvdimm may simultaneously
offer multiple access modes to its media through direct PMEM load/store
access, or windowed BLK mode.  Existing nvdimms mostly implement a PMEM
interface, some offer a BLK-like mode, but never both as ACPI 6 defines.
If an nvdimm is single interfaced, then there is no need for dimm
metadata labels.  For these devices we can take the region boundaries
directly to create a child namespace device (nd_namespace_io).

Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c             |   1 +
 drivers/nvdimm/Makefile         |   2 +
 drivers/nvdimm/bus.c            |  26 ++++++++++
 drivers/nvdimm/core.c           |  44 ++++++++++++++--
 drivers/nvdimm/dimm.c           |   2 +-
 drivers/nvdimm/namespace_devs.c | 111 ++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/nd-core.h        |   6 ++-
 drivers/nvdimm/nd.h             |  13 +++++
 drivers/nvdimm/region.c         |  93 +++++++++++++++++++++++++++++++++
 drivers/nvdimm/region_devs.c    |  66 +++++++++++++++++++++++-
 include/linux/libnvdimm.h       |   7 ++-
 include/linux/nd.h              |  10 ++++
 include/uapi/linux/ndctl.h      |  10 ++++
 13 files changed, 383 insertions(+), 8 deletions(-)
 create mode 100644 drivers/nvdimm/namespace_devs.c
 create mode 100644 drivers/nvdimm/region.c

(limited to 'include/uapi/linux')

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 068f69d70c9e..ce290748fe36 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -780,6 +780,7 @@ static struct attribute_group acpi_nfit_region_attribute_group = {
 static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
 	&nd_region_attribute_group,
 	&nd_mapping_attribute_group,
+	&nd_device_attribute_group,
 	&acpi_nfit_region_attribute_group,
 	NULL,
 };
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 88afd0d849c3..af5e2760ddbd 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -5,3 +5,5 @@ libnvdimm-y += bus.o
 libnvdimm-y += dimm_devs.o
 libnvdimm-y += dimm.o
 libnvdimm-y += region_devs.o
+libnvdimm-y += region.o
+libnvdimm-y += namespace_devs.o
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index a0308f1872bf..4b77665a6cc8 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -13,6 +13,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
+#include <linux/module.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
 #include <linux/ndctl.h>
@@ -33,6 +34,12 @@ static int to_nd_device_type(struct device *dev)
 {
 	if (is_nvdimm(dev))
 		return ND_DEVICE_DIMM;
+	else if (is_nd_pmem(dev))
+		return ND_DEVICE_REGION_PMEM;
+	else if (is_nd_blk(dev))
+		return ND_DEVICE_REGION_BLK;
+	else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
+		return nd_region_to_nstype(to_nd_region(dev->parent));
 
 	return 0;
 }
@@ -50,27 +57,46 @@ static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
 	return test_bit(to_nd_device_type(dev), &nd_drv->type);
 }
 
+static struct module *to_bus_provider(struct device *dev)
+{
+	/* pin bus providers while regions are enabled */
+	if (is_nd_pmem(dev) || is_nd_blk(dev)) {
+		struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+		return nvdimm_bus->module;
+	}
+	return NULL;
+}
+
 static int nvdimm_bus_probe(struct device *dev)
 {
 	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct module *provider = to_bus_provider(dev);
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 	int rc;
 
+	if (!try_module_get(provider))
+		return -ENXIO;
+
 	rc = nd_drv->probe(dev);
 	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
 			dev_name(dev), rc);
+	if (rc != 0)
+		module_put(provider);
 	return rc;
 }
 
 static int nvdimm_bus_remove(struct device *dev)
 {
 	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct module *provider = to_bus_provider(dev);
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 	int rc;
 
 	rc = nd_drv->remove(dev);
 	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
 			dev_name(dev), rc);
+	module_put(provider);
 	return rc;
 }
 
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 50ab880f0dc0..1b6b15d11f54 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -24,6 +24,36 @@ LIST_HEAD(nvdimm_bus_list);
 DEFINE_MUTEX(nvdimm_bus_list_mutex);
 static DEFINE_IDA(nd_ida);
 
+void nvdimm_bus_lock(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return;
+	mutex_lock(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(nvdimm_bus_lock);
+
+void nvdimm_bus_unlock(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return;
+	mutex_unlock(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(nvdimm_bus_unlock);
+
+bool is_nvdimm_bus_locked(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return false;
+	return mutex_is_locked(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(is_nvdimm_bus_locked);
+
 static void nvdimm_bus_release(struct device *dev)
 {
 	struct nvdimm_bus *nvdimm_bus;
@@ -135,8 +165,8 @@ struct attribute_group nvdimm_bus_attribute_group = {
 };
 EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
 
-struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
-		struct nvdimm_bus_descriptor *nd_desc)
+struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nd_desc, struct module *module)
 {
 	struct nvdimm_bus *nvdimm_bus;
 	int rc;
@@ -146,11 +176,13 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		return NULL;
 	INIT_LIST_HEAD(&nvdimm_bus->list);
 	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+	mutex_init(&nvdimm_bus->reconfig_mutex);
 	if (nvdimm_bus->id < 0) {
 		kfree(nvdimm_bus);
 		return NULL;
 	}
 	nvdimm_bus->nd_desc = nd_desc;
+	nvdimm_bus->module = module;
 	nvdimm_bus->dev.parent = parent;
 	nvdimm_bus->dev.release = nvdimm_bus_release;
 	nvdimm_bus->dev.groups = nd_desc->attr_groups;
@@ -174,7 +206,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 	put_device(&nvdimm_bus->dev);
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(nvdimm_bus_register);
+EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
 
 static int child_unregister(struct device *dev, void *data)
 {
@@ -218,7 +250,12 @@ static __init int libnvdimm_init(void)
 	rc = nvdimm_init();
 	if (rc)
 		goto err_dimm;
+	rc = nd_region_init();
+	if (rc)
+		goto err_region;
 	return 0;
+ err_region:
+	nvdimm_exit();
  err_dimm:
 	nvdimm_bus_exit();
 	return rc;
@@ -227,6 +264,7 @@ static __init int libnvdimm_init(void)
 static __exit void libnvdimm_exit(void)
 {
 	WARN_ON(!list_empty(&nvdimm_bus_list));
+	nd_region_exit();
 	nvdimm_exit();
 	nvdimm_bus_exit();
 }
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 28001a6ccd4e..eb20fc2df32b 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -84,7 +84,7 @@ int __init nvdimm_init(void)
 	return nd_driver_register(&nvdimm_driver);
 }
 
-void __exit nvdimm_exit(void)
+void nvdimm_exit(void)
 {
 	driver_unregister(&nvdimm_driver.drv);
 }
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
new file mode 100644
index 000000000000..4f653d1e61ad
--- /dev/null
+++ b/drivers/nvdimm/namespace_devs.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+static void namespace_io_release(struct device *dev)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+	kfree(nsio);
+}
+
+static struct device_type namespace_io_device_type = {
+	.name = "nd_namespace_io",
+	.release = namespace_io_release,
+};
+
+static ssize_t nstype_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+
+	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
+}
+static DEVICE_ATTR_RO(nstype);
+
+static struct attribute *nd_namespace_attributes[] = {
+	&dev_attr_nstype.attr,
+	NULL,
+};
+
+static struct attribute_group nd_namespace_attribute_group = {
+	.attrs = nd_namespace_attributes,
+};
+
+static const struct attribute_group *nd_namespace_attribute_groups[] = {
+	&nd_device_attribute_group,
+	&nd_namespace_attribute_group,
+	NULL,
+};
+
+static struct device **create_namespace_io(struct nd_region *nd_region)
+{
+	struct nd_namespace_io *nsio;
+	struct device *dev, **devs;
+	struct resource *res;
+
+	nsio = kzalloc(sizeof(*nsio), GFP_KERNEL);
+	if (!nsio)
+		return NULL;
+
+	devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
+	if (!devs) {
+		kfree(nsio);
+		return NULL;
+	}
+
+	dev = &nsio->dev;
+	dev->type = &namespace_io_device_type;
+	dev->parent = &nd_region->dev;
+	res = &nsio->res;
+	res->name = dev_name(&nd_region->dev);
+	res->flags = IORESOURCE_MEM;
+	res->start = nd_region->ndr_start;
+	res->end = res->start + nd_region->ndr_size - 1;
+
+	devs[0] = dev;
+	return devs;
+}
+
+int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
+{
+	struct device **devs = NULL;
+	int i;
+
+	*err = 0;
+	switch (nd_region_to_nstype(nd_region)) {
+	case ND_DEVICE_NAMESPACE_IO:
+		devs = create_namespace_io(nd_region);
+		break;
+	default:
+		break;
+	}
+
+	if (!devs)
+		return -ENODEV;
+
+	for (i = 0; devs[i]; i++) {
+		struct device *dev = devs[i];
+
+		dev_set_name(dev, "namespace%d.%d", nd_region->id, i);
+		dev->groups = nd_namespace_attribute_groups;
+		nd_device_register(dev);
+	}
+	kfree(devs);
+
+	return i;
+}
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 1d760bf24857..0e9b41fd2546 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -21,9 +21,11 @@ extern int nvdimm_major;
 
 struct nvdimm_bus {
 	struct nvdimm_bus_descriptor *nd_desc;
+	struct module *module;
 	struct list_head list;
 	struct device dev;
 	int id;
+	struct mutex reconfig_mutex;
 };
 
 struct nvdimm {
@@ -34,6 +36,9 @@ struct nvdimm {
 	int id;
 };
 
+bool is_nvdimm(struct device *dev);
+bool is_nd_blk(struct device *dev);
+bool is_nd_pmem(struct device *dev);
 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
 int __init nvdimm_bus_init(void);
 void nvdimm_bus_exit(void);
@@ -43,5 +48,4 @@ void nd_synchronize(void);
 int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
 int nd_match_dimm(struct device *dev, void *data);
-bool is_nvdimm(struct device *dev);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ea0cca337aa6..bc5a08e36a25 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -23,6 +23,11 @@ struct nvdimm_drvdata {
 	void *data;
 };
 
+struct nd_region_namespaces {
+	int count;
+	int active;
+};
+
 struct nd_region {
 	struct device dev;
 	u16 ndr_mappings;
@@ -41,7 +46,15 @@ enum nd_async_mode {
 void nd_device_register(struct device *dev);
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
 int __init nvdimm_init(void);
+int __init nd_region_init(void);
 void nvdimm_exit(void);
+void nd_region_exit(void);
 int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+struct nd_region *to_nd_region(struct device *dev);
+int nd_region_to_nstype(struct nd_region *nd_region);
+int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
+void nvdimm_bus_lock(struct device *dev);
+void nvdimm_bus_unlock(struct device *dev);
+bool is_nvdimm_bus_locked(struct device *dev);
 #endif /* __ND_H__ */
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
new file mode 100644
index 000000000000..ade3dba81afd
--- /dev/null
+++ b/drivers/nvdimm/region.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+static int nd_region_probe(struct device *dev)
+{
+	int err;
+	struct nd_region_namespaces *num_ns;
+	struct nd_region *nd_region = to_nd_region(dev);
+	int rc = nd_region_register_namespaces(nd_region, &err);
+
+	num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
+	if (!num_ns)
+		return -ENOMEM;
+
+	if (rc < 0)
+		return rc;
+
+	num_ns->active = rc;
+	num_ns->count = rc + err;
+	dev_set_drvdata(dev, num_ns);
+
+	if (err == 0)
+		return 0;
+
+	if (rc == err)
+		return -ENODEV;
+
+	/*
+	 * Given multiple namespaces per region, we do not want to
+	 * disable all the successfully registered peer namespaces upon
+	 * a single registration failure.  If userspace is missing a
+	 * namespace that it expects it can disable/re-enable the region
+	 * to retry discovery after correcting the failure.
+	 * <regionX>/namespaces returns the current
+	 * "<async-registered>/<total>" namespace count.
+	 */
+	dev_err(dev, "failed to register %d namespace%s, continuing...\n",
+			err, err == 1 ? "" : "s");
+	return 0;
+}
+
+static int child_unregister(struct device *dev, void *data)
+{
+	nd_device_unregister(dev, ND_SYNC);
+	return 0;
+}
+
+static int nd_region_remove(struct device *dev)
+{
+	/* flush attribute readers and disable */
+	nvdimm_bus_lock(dev);
+	dev_set_drvdata(dev, NULL);
+	nvdimm_bus_unlock(dev);
+
+	device_for_each_child(dev, NULL, child_unregister);
+	return 0;
+}
+
+static struct nd_device_driver nd_region_driver = {
+	.probe = nd_region_probe,
+	.remove = nd_region_remove,
+	.drv = {
+		.name = "nd_region",
+	},
+	.type = ND_DRIVER_REGION_BLK | ND_DRIVER_REGION_PMEM,
+};
+
+int __init nd_region_init(void)
+{
+	return nd_driver_register(&nd_region_driver);
+}
+
+void nd_region_exit(void)
+{
+	driver_unregister(&nd_region_driver.drv);
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM);
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4bda2e0df8f7..b5c5b9095b28 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -47,11 +47,16 @@ static struct device_type nd_volatile_device_type = {
 	.release = nd_region_release,
 };
 
-static bool is_nd_pmem(struct device *dev)
+bool is_nd_pmem(struct device *dev)
 {
 	return dev ? dev->type == &nd_pmem_device_type : false;
 }
 
+bool is_nd_blk(struct device *dev)
+{
+	return dev ? dev->type == &nd_blk_device_type : false;
+}
+
 struct nd_region *to_nd_region(struct device *dev)
 {
 	struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
@@ -61,6 +66,37 @@ struct nd_region *to_nd_region(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nd_region);
 
+/**
+ * nd_region_to_nstype() - region to an integer namespace type
+ * @nd_region: region-device to interrogate
+ *
+ * This is the 'nstype' attribute of a region as well, an input to the
+ * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
+ * namespace devices with namespace drivers.
+ */
+int nd_region_to_nstype(struct nd_region *nd_region)
+{
+	if (is_nd_pmem(&nd_region->dev)) {
+		u16 i, alias;
+
+		for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+			struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+			if (nvdimm->flags & NDD_ALIASING)
+				alias++;
+		}
+		if (alias)
+			return ND_DEVICE_NAMESPACE_PMEM;
+		else
+			return ND_DEVICE_NAMESPACE_IO;
+	} else if (is_nd_blk(&nd_region->dev)) {
+		return ND_DEVICE_NAMESPACE_BLK;
+	}
+
+	return 0;
+}
+
 static ssize_t size_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -88,9 +124,37 @@ static ssize_t mappings_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(mappings);
 
+static ssize_t nstype_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
+}
+static DEVICE_ATTR_RO(nstype);
+
+static ssize_t init_namespaces_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (num_ns)
+		rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+	else
+		rc = -ENXIO;
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(init_namespaces);
+
 static struct attribute *nd_region_attributes[] = {
 	&dev_attr_size.attr,
+	&dev_attr_nstype.attr,
 	&dev_attr_mappings.attr,
+	&dev_attr_init_namespaces.attr,
 	NULL,
 };
 
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 39e7e606092a..37f966aff386 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -71,8 +71,11 @@ struct nd_region_desc {
 
 struct nvdimm_bus;
 struct device;
-struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
-		struct nvdimm_bus_descriptor *nfit_desc);
+struct module;
+struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
+#define nvdimm_bus_register(parent, desc) \
+	__nvdimm_bus_register(parent, desc, THIS_MODULE)
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index e074f67e53a3..da70e9962197 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -26,6 +26,16 @@ static inline struct nd_device_driver *to_nd_device_driver(
 		struct device_driver *drv)
 {
 	return container_of(drv, struct nd_device_driver, drv);
+};
+
+struct nd_namespace_io {
+	struct device dev;
+	struct resource res;
+};
+
+static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
+{
+	return container_of(dev, struct nd_namespace_io, dev);
 }
 
 #define MODULE_ALIAS_ND_DEVICE(type) \
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 37640916d146..174b6371dcc1 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -177,8 +177,18 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 
 
 #define ND_DEVICE_DIMM 1            /* nd_dimm: container for "config data" */
+#define ND_DEVICE_REGION_PMEM 2     /* nd_region: (parent of PMEM namespaces) */
+#define ND_DEVICE_REGION_BLK 3      /* nd_region: (parent of BLK namespaces) */
+#define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
+#define ND_DEVICE_NAMESPACE_PMEM 5  /* PMEM namespace (may alias with BLK) */
+#define ND_DEVICE_NAMESPACE_BLK 6   /* BLK namespace (may alias with PMEM) */
 
 enum nd_driver_flags {
 	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
+	ND_DRIVER_REGION_PMEM     = 1 << ND_DEVICE_REGION_PMEM,
+	ND_DRIVER_REGION_BLK      = 1 << ND_DEVICE_REGION_BLK,
+	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
+	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
+	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
 };
 #endif /* __NDCTL_H__ */
-- 
cgit v1.2.3


From 4a826c83db4edc040da3a66dbefd53f0cfcf457d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 9 Jun 2015 16:09:36 -0400
Subject: libnvdimm: namespace indices: read and validate

This on media label format [1] consists of two index blocks followed by
an array of labels.  None of these structures are ever updated in place.
A sequence number tracks the current active index and the next one to
write, while labels are written to free slots.

    +------------+
    |            |
    |  nsindex0  |
    |            |
    +------------+
    |            |
    |  nsindex1  |
    |            |
    +------------+
    |   label0   |
    +------------+
    |   label1   |
    +------------+
    |            |
     ....nslot...
    |            |
    +------------+
    |   labelN   |
    +------------+

After reading valid labels, store the dpa ranges they claim into
per-dimm resource trees.

[1]: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf

Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/Makefile    |   1 +
 drivers/nvdimm/dimm.c      |  23 ++++
 drivers/nvdimm/dimm_devs.c |  30 ++++-
 drivers/nvdimm/label.c     | 290 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/label.h     | 128 ++++++++++++++++++++
 drivers/nvdimm/nd.h        |  49 ++++++++
 include/uapi/linux/ndctl.h |   1 -
 7 files changed, 520 insertions(+), 2 deletions(-)
 create mode 100644 drivers/nvdimm/label.c
 create mode 100644 drivers/nvdimm/label.h

(limited to 'include/uapi/linux')

diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 4d2a27f52faa..abce98f87f16 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -10,3 +10,4 @@ libnvdimm-y += dimm.o
 libnvdimm-y += region_devs.o
 libnvdimm-y += region.o
 libnvdimm-y += namespace_devs.o
+libnvdimm-y += label.o
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index eb20fc2df32b..2df97c3c3b34 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/nd.h>
+#include "label.h"
 #include "nd.h"
 
 static void free_data(struct nvdimm_drvdata *ndd)
@@ -42,6 +43,11 @@ static int nvdimm_probe(struct device *dev)
 		return -ENOMEM;
 
 	dev_set_drvdata(dev, ndd);
+	ndd->dpa.name = dev_name(dev);
+	ndd->ns_current = -1;
+	ndd->ns_next = -1;
+	ndd->dpa.start = 0;
+	ndd->dpa.end = -1;
 	ndd->dev = dev;
 
 	rc = nvdimm_init_nsarea(ndd);
@@ -54,6 +60,17 @@ static int nvdimm_probe(struct device *dev)
 
 	dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
 
+	nvdimm_bus_lock(dev);
+	ndd->ns_current = nd_label_validate(ndd);
+	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
+	nd_label_copy(ndd, to_next_namespace_index(ndd),
+			to_current_namespace_index(ndd));
+	rc = nd_label_reserve_dpa(ndd);
+	nvdimm_bus_unlock(dev);
+
+	if (rc)
+		goto err;
+
 	return 0;
 
  err:
@@ -64,7 +81,13 @@ static int nvdimm_probe(struct device *dev)
 static int nvdimm_remove(struct device *dev)
 {
 	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+	struct resource *res, *_r;
 
+	nvdimm_bus_lock(dev);
+	dev_set_drvdata(dev, NULL);
+	for_each_dpa_resource_safe(ndd, res, _r)
+		nvdimm_free_dpa(ndd, res);
+	nvdimm_bus_unlock(dev);
 	free_data(ndd);
 
 	return 0;
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index bdf8241b6525..d2ef02e4be6c 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -92,8 +92,12 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
 	if (ndd->data)
 		return 0;
 
-	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0)
+	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
+			|| ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
+		dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
+				ndd->nsarea.max_xfer, ndd->nsarea.config_size);
 		return -ENXIO;
+	}
 
 	ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
 	if (!ndd->data)
@@ -243,6 +247,30 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 }
 EXPORT_SYMBOL_GPL(nvdimm_create);
 
+void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
+{
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
+	kfree(res->name);
+	__release_region(&ndd->dpa, res->start, resource_size(res));
+}
+
+struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id, resource_size_t start,
+		resource_size_t n)
+{
+	char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL);
+	struct resource *res;
+
+	if (!name)
+		return NULL;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
+	res = __request_region(&ndd->dpa, start, n, name, 0);
+	if (!res)
+		kfree(name);
+	return res;
+}
+
 static int count_dimms(struct device *dev, void *c)
 {
 	int *count = c;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
new file mode 100644
index 000000000000..db5d7492dc8d
--- /dev/null
+++ b/drivers/nvdimm/label.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/ndctl.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "label.h"
+#include "nd.h"
+
+static u32 best_seq(u32 a, u32 b)
+{
+	a &= NSINDEX_SEQ_MASK;
+	b &= NSINDEX_SEQ_MASK;
+
+	if (a == 0 || a == b)
+		return b;
+	else if (b == 0)
+		return a;
+	else if (nd_inc_seq(a) == b)
+		return b;
+	else
+		return a;
+}
+
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
+{
+	u32 index_span;
+
+	if (ndd->nsindex_size)
+		return ndd->nsindex_size;
+
+	/*
+	 * The minimum index space is 512 bytes, with that amount of
+	 * index we can describe ~1400 labels which is less than a byte
+	 * of overhead per label.  Round up to a byte of overhead per
+	 * label and determine the size of the index region.  Yes, this
+	 * starts to waste space at larger config_sizes, but it's
+	 * unlikely we'll ever see anything but 128K.
+	 */
+	index_span = ndd->nsarea.config_size / 129;
+	index_span /= NSINDEX_ALIGN * 2;
+	ndd->nsindex_size = index_span * NSINDEX_ALIGN;
+
+	return ndd->nsindex_size;
+}
+
+int nd_label_validate(struct nvdimm_drvdata *ndd)
+{
+	/*
+	 * On media label format consists of two index blocks followed
+	 * by an array of labels.  None of these structures are ever
+	 * updated in place.  A sequence number tracks the current
+	 * active index and the next one to write, while labels are
+	 * written to free slots.
+	 *
+	 *     +------------+
+	 *     |            |
+	 *     |  nsindex0  |
+	 *     |            |
+	 *     +------------+
+	 *     |            |
+	 *     |  nsindex1  |
+	 *     |            |
+	 *     +------------+
+	 *     |   label0   |
+	 *     +------------+
+	 *     |   label1   |
+	 *     +------------+
+	 *     |            |
+	 *      ....nslot...
+	 *     |            |
+	 *     +------------+
+	 *     |   labelN   |
+	 *     +------------+
+	 */
+	struct nd_namespace_index *nsindex[] = {
+		to_namespace_index(ndd, 0),
+		to_namespace_index(ndd, 1),
+	};
+	const int num_index = ARRAY_SIZE(nsindex);
+	struct device *dev = ndd->dev;
+	bool valid[2] = { 0 };
+	int i, num_valid = 0;
+	u32 seq;
+
+	for (i = 0; i < num_index; i++) {
+		u32 nslot;
+		u8 sig[NSINDEX_SIG_LEN];
+		u64 sum_save, sum, size;
+
+		memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
+		if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
+			dev_dbg(dev, "%s: nsindex%d signature invalid\n",
+					__func__, i);
+			continue;
+		}
+		sum_save = __le64_to_cpu(nsindex[i]->checksum);
+		nsindex[i]->checksum = __cpu_to_le64(0);
+		sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
+		nsindex[i]->checksum = __cpu_to_le64(sum_save);
+		if (sum != sum_save) {
+			dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
+					__func__, i);
+			continue;
+		}
+
+		seq = __le32_to_cpu(nsindex[i]->seq);
+		if ((seq & NSINDEX_SEQ_MASK) == 0) {
+			dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
+					__func__, i, seq);
+			continue;
+		}
+
+		/* sanity check the index against expected values */
+		if (__le64_to_cpu(nsindex[i]->myoff)
+				!= i * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
+					__func__, i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->myoff));
+			continue;
+		}
+		if (__le64_to_cpu(nsindex[i]->otheroff)
+				!= (!i) * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
+					__func__, i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->otheroff));
+			continue;
+		}
+
+		size = __le64_to_cpu(nsindex[i]->mysize);
+		if (size > sizeof_namespace_index(ndd)
+				|| size < sizeof(struct nd_namespace_index)) {
+			dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
+					__func__, i, size);
+			continue;
+		}
+
+		nslot = __le32_to_cpu(nsindex[i]->nslot);
+		if (nslot * sizeof(struct nd_namespace_label)
+				+ 2 * sizeof_namespace_index(ndd)
+				> ndd->nsarea.config_size) {
+			dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
+					__func__, i, nslot,
+					ndd->nsarea.config_size);
+			continue;
+		}
+		valid[i] = true;
+		num_valid++;
+	}
+
+	switch (num_valid) {
+	case 0:
+		break;
+	case 1:
+		for (i = 0; i < num_index; i++)
+			if (valid[i])
+				return i;
+		/* can't have num_valid > 0 but valid[] = { false, false } */
+		WARN_ON(1);
+		break;
+	default:
+		/* pick the best index... */
+		seq = best_seq(__le32_to_cpu(nsindex[0]->seq),
+				__le32_to_cpu(nsindex[1]->seq));
+		if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK))
+			return 1;
+		else
+			return 0;
+		break;
+	}
+
+	return -1;
+}
+
+void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
+		struct nd_namespace_index *src)
+{
+	if (dst && src)
+		/* pass */;
+	else
+		return;
+
+	memcpy(dst, src, sizeof_namespace_index(ndd));
+}
+
+static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd)
+{
+	void *base = to_namespace_index(ndd, 0);
+
+	return base + 2 * sizeof_namespace_index(ndd);
+}
+
+#define for_each_clear_bit_le(bit, addr, size) \
+	for ((bit) = find_next_zero_bit_le((addr), (size), 0);  \
+	     (bit) < (size);                                    \
+	     (bit) = find_next_zero_bit_le((addr), (size), (bit) + 1))
+
+/**
+ * preamble_current - common variable initialization for nd_label_* routines
+ * @ndd: dimm container for the relevant label set
+ * @nsindex_out: on return set to the currently active namespace index
+ * @free: on return set to the free label bitmap in the index
+ * @nslot: on return set to the number of slots in the label space
+ */
+static bool preamble_current(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_index **nsindex_out,
+		unsigned long **free, u32 *nslot)
+{
+	struct nd_namespace_index *nsindex;
+
+	nsindex = to_current_namespace_index(ndd);
+	if (nsindex == NULL)
+		return false;
+
+	*free = (unsigned long *) nsindex->free;
+	*nslot = __le32_to_cpu(nsindex->nslot);
+	*nsindex_out = nsindex;
+
+	return true;
+}
+
+static char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
+{
+	if (!label_id || !uuid)
+		return NULL;
+	snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb",
+			flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid);
+	return label_id->id;
+}
+
+static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
+{
+	/* check that we are written where we expect to be written */
+	if (slot != __le32_to_cpu(nd_label->slot))
+		return false;
+
+	/* check that DPA allocations are page aligned */
+	if ((__le64_to_cpu(nd_label->dpa)
+				| __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
+		return false;
+
+	return true;
+}
+
+int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return 0; /* no label, nothing to reserve */
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+		struct nd_region *nd_region = NULL;
+		u8 label_uuid[NSLABEL_UUID_LEN];
+		struct nd_label_id label_id;
+		struct resource *res;
+		u32 flags;
+
+		nd_label = nd_label_base(ndd) + slot;
+
+		if (!slot_valid(nd_label, slot))
+			continue;
+
+		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		flags = __le32_to_cpu(nd_label->flags);
+		nd_label_gen_id(&label_id, label_uuid, flags);
+		res = nvdimm_allocate_dpa(ndd, &label_id,
+				__le64_to_cpu(nd_label->dpa),
+				__le64_to_cpu(nd_label->rawsize));
+		nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
+		if (!res)
+			return -EBUSY;
+	}
+
+	return 0;
+}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
new file mode 100644
index 000000000000..d6aa0d5c6b4e
--- /dev/null
+++ b/drivers/nvdimm/label.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LABEL_H__
+#define __LABEL_H__
+
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/io.h>
+
+enum {
+	NSINDEX_SIG_LEN = 16,
+	NSINDEX_ALIGN = 256,
+	NSINDEX_SEQ_MASK = 0x3,
+	NSLABEL_UUID_LEN = 16,
+	NSLABEL_NAME_LEN = 64,
+	NSLABEL_FLAG_ROLABEL = 0x1,  /* read-only label */
+	NSLABEL_FLAG_LOCAL = 0x2,    /* DIMM-local namespace */
+	NSLABEL_FLAG_BTT = 0x4,      /* namespace contains a BTT */
+	NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */
+	BTT_ALIGN = 4096,            /* all btt structures */
+	BTTINFO_SIG_LEN = 16,
+	BTTINFO_UUID_LEN = 16,
+	BTTINFO_FLAG_ERROR = 0x1,    /* error state (read-only) */
+	BTTINFO_MAJOR_VERSION = 1,
+	ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
+	ND_LABEL_ID_SIZE = 50,
+};
+
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
+
+/**
+ * struct nd_namespace_index - label set superblock
+ * @sig: NAMESPACE_INDEX\0
+ * @flags: placeholder
+ * @seq: sequence number for this index
+ * @myoff: offset of this index in label area
+ * @mysize: size of this index struct
+ * @otheroff: offset of other index
+ * @labeloff: offset of first label slot
+ * @nslot: total number of label slots
+ * @major: label area major version
+ * @minor: label area minor version
+ * @checksum: fletcher64 of all fields
+ * @free[0]: bitmap, nlabel bits
+ *
+ * The size of free[] is rounded up so the total struct size is a
+ * multiple of NSINDEX_ALIGN bytes.  Any bits this allocates beyond
+ * nlabel bits must be zero.
+ */
+struct nd_namespace_index {
+	u8 sig[NSINDEX_SIG_LEN];
+	__le32 flags;
+	__le32 seq;
+	__le64 myoff;
+	__le64 mysize;
+	__le64 otheroff;
+	__le64 labeloff;
+	__le32 nslot;
+	__le16 major;
+	__le16 minor;
+	__le64 checksum;
+	u8 free[0];
+};
+
+/**
+ * struct nd_namespace_label - namespace superblock
+ * @uuid: UUID per RFC 4122
+ * @name: optional name (NULL-terminated)
+ * @flags: see NSLABEL_FLAG_*
+ * @nlabel: num labels to describe this ns
+ * @position: labels position in set
+ * @isetcookie: interleave set cookie
+ * @lbasize: LBA size in bytes or 0 for pmem
+ * @dpa: DPA of NVM range on this DIMM
+ * @rawsize: size of namespace
+ * @slot: slot of this label in label area
+ * @unused: must be zero
+ */
+struct nd_namespace_label {
+	u8 uuid[NSLABEL_UUID_LEN];
+	u8 name[NSLABEL_NAME_LEN];
+	__le32 flags;
+	__le16 nlabel;
+	__le16 position;
+	__le64 isetcookie;
+	__le64 lbasize;
+	__le64 dpa;
+	__le64 rawsize;
+	__le32 slot;
+	__le32 unused;
+};
+
+/**
+ * struct nd_label_id - identifier string for dpa allocation
+ * @id: "{blk|pmem}-<namespace uuid>"
+ */
+struct nd_label_id {
+	char id[ND_LABEL_ID_SIZE];
+};
+
+/*
+ * If the 'best' index is invalid, so is the 'next' index.  Otherwise,
+ * the next index is MOD(index+1, 2)
+ */
+static inline int nd_label_next_nsindex(int index)
+{
+	if (index < 0)
+		return -1;
+
+	return (index + 1) % 2;
+}
+
+struct nvdimm_drvdata;
+int nd_label_validate(struct nvdimm_drvdata *ndd);
+void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
+		struct nd_namespace_index *src);
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
+#endif /* __LABEL_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 0285e4588b03..401fa0d5b6ea 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -16,11 +16,15 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
+#include "label.h"
 
 struct nvdimm_drvdata {
 	struct device *dev;
+	int nsindex_size;
 	struct nd_cmd_get_config_size nsarea;
 	void *data;
+	int ns_current, ns_next;
+	struct resource dpa;
 };
 
 struct nd_region_namespaces {
@@ -28,6 +32,37 @@ struct nd_region_namespaces {
 	int active;
 };
 
+static inline struct nd_namespace_index *to_namespace_index(
+		struct nvdimm_drvdata *ndd, int i)
+{
+	if (i < 0)
+		return NULL;
+
+	return ndd->data + sizeof_namespace_index(ndd) * i;
+}
+
+static inline struct nd_namespace_index *to_current_namespace_index(
+		struct nvdimm_drvdata *ndd)
+{
+	return to_namespace_index(ndd, ndd->ns_current);
+}
+
+static inline struct nd_namespace_index *to_next_namespace_index(
+		struct nvdimm_drvdata *ndd)
+{
+	return to_namespace_index(ndd, ndd->ns_next);
+}
+
+#define nd_dbg_dpa(r, d, res, fmt, arg...) \
+	dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \
+		(r) ? dev_name((d)->dev) : "", res ? res->name : "null", \
+		(unsigned long long) (res ? resource_size(res) : 0), \
+		(unsigned long long) (res ? res->start : 0), ##arg)
+
+#define for_each_dpa_resource_safe(ndd, res, next) \
+	for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
+			res; res = next, next = next ? next->sibling : NULL)
+
 struct nd_region {
 	struct device dev;
 	u16 ndr_mappings;
@@ -39,6 +74,15 @@ struct nd_region {
 	struct nd_mapping mapping[0];
 };
 
+/*
+ * Lookup next in the repeating sequence of 01, 10, and 11.
+ */
+static inline unsigned nd_inc_seq(unsigned seq)
+{
+	static const unsigned next[] = { 0, 2, 3, 1 };
+
+	return next[seq & 3];
+}
 enum nd_async_mode {
 	ND_SYNC,
 	ND_ASYNC,
@@ -58,4 +102,9 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 void nvdimm_bus_lock(struct device *dev);
 void nvdimm_bus_unlock(struct device *dev);
 bool is_nvdimm_bus_locked(struct device *dev);
+int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
+void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
+struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id, resource_size_t start,
+		resource_size_t n);
 #endif /* __ND_H__ */
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 174b6371dcc1..1357a87b8714 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -175,7 +175,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 #define ND_IOCTL_ARS_STATUS		_IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\
 					struct nd_cmd_ars_status)
 
-
 #define ND_DEVICE_DIMM 1            /* nd_dimm: container for "config data" */
 #define ND_DEVICE_REGION_PMEM 2     /* nd_region: (parent of PMEM namespaces) */
 #define ND_DEVICE_REGION_BLK 3      /* nd_region: (parent of BLK namespaces) */
-- 
cgit v1.2.3


From bf9bccc14c05dae8caba29df6187c731710f5380 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 17 Jun 2015 17:14:46 -0400
Subject: libnvdimm: pmem label sets and namespace instantiation.

A complete label set is a PMEM-label per-dimm per-interleave-set where
all the UUIDs match and the interleave set cookie matches the hosting
interleave set.

Present sysfs attributes for manipulation of a PMEM-namespace's
'alt_name', 'uuid', and 'size' attributes.  A later patch will make
these settings persistent by writing back the label.

Note that PMEM allocations grow forwards from the start of an interleave
set (lowest dimm-physical-address (DPA)).  BLK-namespaces that alias
with a PMEM interleave set will grow allocations backward from the
highest DPA.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/bus.c            |    8 +-
 drivers/nvdimm/core.c           |   64 +++
 drivers/nvdimm/dimm.c           |   21 +-
 drivers/nvdimm/dimm_devs.c      |  137 ++++++
 drivers/nvdimm/label.c          |   55 ++-
 drivers/nvdimm/label.h          |    2 +
 drivers/nvdimm/namespace_devs.c | 1002 ++++++++++++++++++++++++++++++++++++++-
 drivers/nvdimm/nd-core.h        |   12 +
 drivers/nvdimm/nd.h             |   17 +
 drivers/nvdimm/pmem.c           |   20 +-
 drivers/nvdimm/region.c         |    3 +
 drivers/nvdimm/region_devs.c    |  158 +++++-
 include/linux/libnvdimm.h       |   10 +
 include/linux/nd.h              |   24 +
 include/uapi/linux/ndctl.h      |    4 +
 15 files changed, 1506 insertions(+), 31 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index ffb43cada625..fddc3f2a8f80 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -97,6 +97,8 @@ static int nvdimm_bus_probe(struct device *dev)
 	rc = nd_drv->probe(dev);
 	if (rc == 0)
 		nd_region_probe_success(nvdimm_bus, dev);
+	else
+		nd_region_disable(nvdimm_bus, dev);
 	nvdimm_bus_probe_end(nvdimm_bus);
 
 	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
@@ -381,8 +383,10 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
 }
 EXPORT_SYMBOL_GPL(nd_cmd_out_size);
 
-static void wait_nvdimm_bus_probe_idle(struct nvdimm_bus *nvdimm_bus)
+void wait_nvdimm_bus_probe_idle(struct device *dev)
 {
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
 	do {
 		if (nvdimm_bus->probe_active == 0)
 			break;
@@ -402,7 +406,7 @@ static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd)
 		return 0;
 
 	nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
-	wait_nvdimm_bus_probe_idle(nvdimm_bus);
+	wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
 
 	if (atomic_read(&nvdimm->busy))
 		return -EBUSY;
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 7806eaaf4707..cf99cce8ef33 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/ctype.h>
 #include <linux/ndctl.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -109,6 +110,69 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
 	return NULL;
 }
 
+static bool is_uuid_sep(char sep)
+{
+	if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
+		return true;
+	return false;
+}
+
+static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
+		size_t len)
+{
+	const char *str = buf;
+	u8 uuid[16];
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
+			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
+					__func__, i, str - buf, str[0],
+					str + 1 - buf, str[1]);
+			return -EINVAL;
+		}
+
+		uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
+		str += 2;
+		if (is_uuid_sep(*str))
+			str++;
+	}
+
+	memcpy(uuid_out, uuid, sizeof(uuid));
+	return 0;
+}
+
+/**
+ * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
+ * @dev: container device for the uuid property
+ * @uuid_out: uuid buffer to replace
+ * @buf: raw sysfs buffer to parse
+ *
+ * Enforce that uuids can only be changed while the device is disabled
+ * (driver detached)
+ * LOCKING: expects device_lock() is held on entry
+ */
+int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+		size_t len)
+{
+	u8 uuid[16];
+	int rc;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	rc = nd_uuid_parse(dev, uuid, buf, len);
+	if (rc)
+		return rc;
+
+	kfree(*uuid_out);
+	*uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
+	if (!(*uuid_out))
+		return -ENOMEM;
+
+	return 0;
+}
+
 static ssize_t commands_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 2df97c3c3b34..71d12bb67339 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -21,18 +21,6 @@
 #include "label.h"
 #include "nd.h"
 
-static void free_data(struct nvdimm_drvdata *ndd)
-{
-	if (!ndd)
-		return;
-
-	if (ndd->data && is_vmalloc_addr(ndd->data))
-		vfree(ndd->data);
-	else
-		kfree(ndd->data);
-	kfree(ndd);
-}
-
 static int nvdimm_probe(struct device *dev)
 {
 	struct nvdimm_drvdata *ndd;
@@ -49,6 +37,8 @@ static int nvdimm_probe(struct device *dev)
 	ndd->dpa.start = 0;
 	ndd->dpa.end = -1;
 	ndd->dev = dev;
+	get_device(dev);
+	kref_init(&ndd->kref);
 
 	rc = nvdimm_init_nsarea(ndd);
 	if (rc)
@@ -74,21 +64,18 @@ static int nvdimm_probe(struct device *dev)
 	return 0;
 
  err:
-	free_data(ndd);
+	put_ndd(ndd);
 	return rc;
 }
 
 static int nvdimm_remove(struct device *dev)
 {
 	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
-	struct resource *res, *_r;
 
 	nvdimm_bus_lock(dev);
 	dev_set_drvdata(dev, NULL);
-	for_each_dpa_resource_safe(ndd, res, _r)
-		nvdimm_free_dpa(ndd, res);
 	nvdimm_bus_unlock(dev);
-	free_data(ndd);
+	put_ndd(ndd);
 
 	return 0;
 }
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index d2ef02e4be6c..b55acef179ba 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -159,6 +159,48 @@ struct nvdimm *to_nvdimm(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nvdimm);
 
+struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
+{
+	struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));
+
+	return dev_get_drvdata(&nvdimm->dev);
+}
+EXPORT_SYMBOL(to_ndd);
+
+void nvdimm_drvdata_release(struct kref *kref)
+{
+	struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref);
+	struct device *dev = ndd->dev;
+	struct resource *res, *_r;
+
+	dev_dbg(dev, "%s\n", __func__);
+
+	nvdimm_bus_lock(dev);
+	for_each_dpa_resource_safe(ndd, res, _r)
+		nvdimm_free_dpa(ndd, res);
+	nvdimm_bus_unlock(dev);
+
+	if (ndd->data && is_vmalloc_addr(ndd->data))
+		vfree(ndd->data);
+	else
+		kfree(ndd->data);
+	kfree(ndd);
+	put_device(dev);
+}
+
+void get_ndd(struct nvdimm_drvdata *ndd)
+{
+	kref_get(&ndd->kref);
+}
+
+void put_ndd(struct nvdimm_drvdata *ndd)
+{
+	if (ndd)
+		kref_put(&ndd->kref, nvdimm_drvdata_release);
+}
+
 const char *nvdimm_name(struct nvdimm *nvdimm)
 {
 	return dev_name(&nvdimm->dev);
@@ -247,6 +289,83 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 }
 EXPORT_SYMBOL_GPL(nvdimm_create);
 
+/**
+ * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
+ * @nd_mapping: container of dpa-resource-root + labels
+ * @nd_region: constrain available space check to this reference region
+ * @overlap: calculate available space assuming this level of overlap
+ *
+ * Validate that a PMEM label, if present, aligns with the start of an
+ * interleave set and truncate the available size at the lowest BLK
+ * overlap point.
+ *
+ * The expectation is that this routine is called multiple times as it
+ * probes for the largest BLK encroachment for any single member DIMM of
+ * the interleave set.  Once that value is determined the PMEM-limit for
+ * the set can be established.
+ */
+resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, resource_size_t *overlap)
+{
+	resource_size_t map_start, map_end, busy = 0, available, blk_start;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct resource *res;
+	const char *reason;
+
+	if (!ndd)
+		return 0;
+
+	map_start = nd_mapping->start;
+	map_end = map_start + nd_mapping->size - 1;
+	blk_start = max(map_start, map_end + 1 - *overlap);
+	for_each_dpa_resource(ndd, res)
+		if (res->start >= map_start && res->start < map_end) {
+			if (strncmp(res->name, "blk", 3) == 0)
+				blk_start = min(blk_start, res->start);
+			else if (res->start != map_start) {
+				reason = "misaligned to iset";
+				goto err;
+			} else {
+				if (busy) {
+					reason = "duplicate overlapping PMEM reservations?";
+					goto err;
+				}
+				busy += resource_size(res);
+				continue;
+			}
+		} else if (res->end >= map_start && res->end <= map_end) {
+			if (strncmp(res->name, "blk", 3) == 0) {
+				/*
+				 * If a BLK allocation overlaps the start of
+				 * PMEM the entire interleave set may now only
+				 * be used for BLK.
+				 */
+				blk_start = map_start;
+			} else {
+				reason = "misaligned to iset";
+				goto err;
+			}
+		} else if (map_start > res->start && map_start < res->end) {
+			/* total eclipse of the mapping */
+			busy += nd_mapping->size;
+			blk_start = map_start;
+		}
+
+	*overlap = map_end + 1 - blk_start;
+	available = blk_start - map_start;
+	if (busy < available)
+		return available - busy;
+	return 0;
+
+ err:
+	/*
+	 * Something is wrong, PMEM must align with the start of the
+	 * interleave set, and there can only be one allocation per set.
+	 */
+	nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
+	return 0;
+}
+
 void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
 {
 	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
@@ -271,6 +390,24 @@ struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
 	return res;
 }
 
+/**
+ * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
+ * @nvdimm: container of dpa-resource-root + labels
+ * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
+ */
+resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id)
+{
+	resource_size_t allocated = 0;
+	struct resource *res;
+
+	for_each_dpa_resource(ndd, res)
+		if (strcmp(res->name, label_id->id) == 0)
+			allocated += resource_size(res);
+
+	return allocated;
+}
+
 static int count_dimms(struct device *dev, void *c)
 {
 	int *count = c;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index db5d7492dc8d..1a3bcd27a57a 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -230,7 +230,7 @@ static bool preamble_current(struct nvdimm_drvdata *ndd,
 	return true;
 }
 
-static char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
+char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
 {
 	if (!label_id || !uuid)
 		return NULL;
@@ -288,3 +288,56 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 
 	return 0;
 }
+
+int nd_label_active_count(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+	int count = 0;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return 0;
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+
+		nd_label = nd_label_base(ndd) + slot;
+
+		if (!slot_valid(nd_label, slot)) {
+			u32 label_slot = __le32_to_cpu(nd_label->slot);
+			u64 size = __le64_to_cpu(nd_label->rawsize);
+			u64 dpa = __le64_to_cpu(nd_label->dpa);
+
+			dev_dbg(ndd->dev,
+				"%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
+					__func__, slot, label_slot, dpa, size);
+			continue;
+		}
+		count++;
+	}
+	return count;
+}
+
+struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return NULL;
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+
+		nd_label = nd_label_base(ndd) + slot;
+		if (!slot_valid(nd_label, slot))
+			continue;
+
+		if (n-- == 0)
+			return nd_label_base(ndd) + slot;
+	}
+
+	return NULL;
+}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index d6aa0d5c6b4e..8ee1376526c7 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -125,4 +125,6 @@ int nd_label_validate(struct nvdimm_drvdata *ndd);
 void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
 		struct nd_namespace_index *src);
 size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
+int nd_label_active_count(struct nvdimm_drvdata *ndd);
+struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
 #endif /* __LABEL_H__ */
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 4f653d1e61ad..5d81032fcfc5 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -14,6 +14,7 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/nd.h>
+#include "nd-core.h"
 #include "nd.h"
 
 static void namespace_io_release(struct device *dev)
@@ -23,11 +24,50 @@ static void namespace_io_release(struct device *dev)
 	kfree(nsio);
 }
 
+static void namespace_pmem_release(struct device *dev)
+{
+	struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+	kfree(nspm->alt_name);
+	kfree(nspm->uuid);
+	kfree(nspm);
+}
+
+static void namespace_blk_release(struct device *dev)
+{
+	/* TODO: blk namespace support */
+}
+
 static struct device_type namespace_io_device_type = {
 	.name = "nd_namespace_io",
 	.release = namespace_io_release,
 };
 
+static struct device_type namespace_pmem_device_type = {
+	.name = "nd_namespace_pmem",
+	.release = namespace_pmem_release,
+};
+
+static struct device_type namespace_blk_device_type = {
+	.name = "nd_namespace_blk",
+	.release = namespace_blk_release,
+};
+
+static bool is_namespace_pmem(struct device *dev)
+{
+	return dev ? dev->type == &namespace_pmem_device_type : false;
+}
+
+static bool is_namespace_blk(struct device *dev)
+{
+	return dev ? dev->type == &namespace_blk_device_type : false;
+}
+
+static bool is_namespace_io(struct device *dev)
+{
+	return dev ? dev->type == &namespace_io_device_type : false;
+}
+
 static ssize_t nstype_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -37,13 +77,676 @@ static ssize_t nstype_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(nstype);
 
+static ssize_t __alt_name_store(struct device *dev, const char *buf,
+		const size_t len)
+{
+	char *input, *pos, *alt_name, **ns_altname;
+	ssize_t rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_altname = &nspm->alt_name;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else
+		return -ENXIO;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	input = kmemdup(buf, len + 1, GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	input[len] = '\0';
+	pos = strim(input);
+	if (strlen(pos) + 1 > NSLABEL_NAME_LEN) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	alt_name = kzalloc(NSLABEL_NAME_LEN, GFP_KERNEL);
+	if (!alt_name) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	kfree(*ns_altname);
+	*ns_altname = alt_name;
+	sprintf(*ns_altname, "%s", pos);
+	rc = len;
+
+out:
+	kfree(input);
+	return rc;
+}
+
+static ssize_t alt_name_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __alt_name_store(dev, buf, len);
+	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc;
+}
+
+static ssize_t alt_name_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	char *ns_altname;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_altname = nspm->alt_name;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else
+		return -ENXIO;
+
+	return sprintf(buf, "%s\n", ns_altname ? ns_altname : "");
+}
+static DEVICE_ATTR_RW(alt_name);
+
+static int scan_free(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
+		resource_size_t n)
+{
+	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	int rc = 0;
+
+	while (n) {
+		struct resource *res, *last;
+		resource_size_t new_start;
+
+		last = NULL;
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, label_id->id) == 0)
+				last = res;
+		res = last;
+		if (!res)
+			return 0;
+
+		if (n >= resource_size(res)) {
+			n -= resource_size(res);
+			nd_dbg_dpa(nd_region, ndd, res, "delete %d\n", rc);
+			nvdimm_free_dpa(ndd, res);
+			/* retry with last resource deleted */
+			continue;
+		}
+
+		/*
+		 * Keep BLK allocations relegated to high DPA as much as
+		 * possible
+		 */
+		if (is_blk)
+			new_start = res->start + n;
+		else
+			new_start = res->start;
+
+		rc = adjust_resource(res, new_start, resource_size(res) - n);
+		nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc);
+		break;
+	}
+
+	return rc;
+}
+
+/**
+ * shrink_dpa_allocation - for each dimm in region free n bytes for label_id
+ * @nd_region: the set of dimms to reclaim @n bytes from
+ * @label_id: unique identifier for the namespace consuming this dpa range
+ * @n: number of bytes per-dimm to release
+ *
+ * Assumes resources are ordered.  Starting from the end try to
+ * adjust_resource() the allocation to @n, but if @n is larger than the
+ * allocation delete it and find the 'new' last allocation in the label
+ * set.
+ */
+static int shrink_dpa_allocation(struct nd_region *nd_region,
+		struct nd_label_id *label_id, resource_size_t n)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		int rc;
+
+		rc = scan_free(nd_region, nd_mapping, label_id, n);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
+		struct nd_region *nd_region, struct nd_mapping *nd_mapping,
+		resource_size_t n)
+{
+	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	resource_size_t first_dpa;
+	struct resource *res;
+	int rc = 0;
+
+	/* allocate blk from highest dpa first */
+	if (is_blk)
+		first_dpa = nd_mapping->start + nd_mapping->size - n;
+	else
+		first_dpa = nd_mapping->start;
+
+	/* first resource allocation for this label-id or dimm */
+	res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n);
+	if (!res)
+		rc = -EBUSY;
+
+	nd_dbg_dpa(nd_region, ndd, res, "init %d\n", rc);
+	return rc ? n : 0;
+}
+
+static bool space_valid(bool is_pmem, struct nd_label_id *label_id,
+		struct resource *res)
+{
+	/*
+	 * For BLK-space any space is valid, for PMEM-space, it must be
+	 * contiguous with an existing allocation.
+	 */
+	if (!is_pmem)
+		return true;
+	if (!res || strcmp(res->name, label_id->id) == 0)
+		return true;
+	return false;
+}
+
+enum alloc_loc {
+	ALLOC_ERR = 0, ALLOC_BEFORE, ALLOC_MID, ALLOC_AFTER,
+};
+
+static resource_size_t scan_allocate(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
+		resource_size_t n)
+{
+	resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
+	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	const resource_size_t to_allocate = n;
+	struct resource *res;
+	int first;
+
+ retry:
+	first = 0;
+	for_each_dpa_resource(ndd, res) {
+		resource_size_t allocate, available = 0, free_start, free_end;
+		struct resource *next = res->sibling, *new_res = NULL;
+		enum alloc_loc loc = ALLOC_ERR;
+		const char *action;
+		int rc = 0;
+
+		/* ignore resources outside this nd_mapping */
+		if (res->start > mapping_end)
+			continue;
+		if (res->end < nd_mapping->start)
+			continue;
+
+		/* space at the beginning of the mapping */
+		if (!first++ && res->start > nd_mapping->start) {
+			free_start = nd_mapping->start;
+			available = res->start - free_start;
+			if (space_valid(is_pmem, label_id, NULL))
+				loc = ALLOC_BEFORE;
+		}
+
+		/* space between allocations */
+		if (!loc && next) {
+			free_start = res->start + resource_size(res);
+			free_end = min(mapping_end, next->start - 1);
+			if (space_valid(is_pmem, label_id, res)
+					&& free_start < free_end) {
+				available = free_end + 1 - free_start;
+				loc = ALLOC_MID;
+			}
+		}
+
+		/* space at the end of the mapping */
+		if (!loc && !next) {
+			free_start = res->start + resource_size(res);
+			free_end = mapping_end;
+			if (space_valid(is_pmem, label_id, res)
+					&& free_start < free_end) {
+				available = free_end + 1 - free_start;
+				loc = ALLOC_AFTER;
+			}
+		}
+
+		if (!loc || !available)
+			continue;
+		allocate = min(available, n);
+		switch (loc) {
+		case ALLOC_BEFORE:
+			if (strcmp(res->name, label_id->id) == 0) {
+				/* adjust current resource up */
+				if (is_pmem)
+					return n;
+				rc = adjust_resource(res, res->start - allocate,
+						resource_size(res) + allocate);
+				action = "cur grow up";
+			} else
+				action = "allocate";
+			break;
+		case ALLOC_MID:
+			if (strcmp(next->name, label_id->id) == 0) {
+				/* adjust next resource up */
+				if (is_pmem)
+					return n;
+				rc = adjust_resource(next, next->start
+						- allocate, resource_size(next)
+						+ allocate);
+				new_res = next;
+				action = "next grow up";
+			} else if (strcmp(res->name, label_id->id) == 0) {
+				action = "grow down";
+			} else
+				action = "allocate";
+			break;
+		case ALLOC_AFTER:
+			if (strcmp(res->name, label_id->id) == 0)
+				action = "grow down";
+			else
+				action = "allocate";
+			break;
+		default:
+			return n;
+		}
+
+		if (strcmp(action, "allocate") == 0) {
+			/* BLK allocate bottom up */
+			if (!is_pmem)
+				free_start += available - allocate;
+			else if (free_start != nd_mapping->start)
+				return n;
+
+			new_res = nvdimm_allocate_dpa(ndd, label_id,
+					free_start, allocate);
+			if (!new_res)
+				rc = -EBUSY;
+		} else if (strcmp(action, "grow down") == 0) {
+			/* adjust current resource down */
+			rc = adjust_resource(res, res->start, resource_size(res)
+					+ allocate);
+		}
+
+		if (!new_res)
+			new_res = res;
+
+		nd_dbg_dpa(nd_region, ndd, new_res, "%s(%d) %d\n",
+				action, loc, rc);
+
+		if (rc)
+			return n;
+
+		n -= allocate;
+		if (n) {
+			/*
+			 * Retry scan with newly inserted resources.
+			 * For example, if we did an ALLOC_BEFORE
+			 * insertion there may also have been space
+			 * available for an ALLOC_AFTER insertion, so we
+			 * need to check this same resource again
+			 */
+			goto retry;
+		} else
+			return 0;
+	}
+
+	if (is_pmem && n == to_allocate)
+		return init_dpa_allocation(label_id, nd_region, nd_mapping, n);
+	return n;
+}
+
+/**
+ * grow_dpa_allocation - for each dimm allocate n bytes for @label_id
+ * @nd_region: the set of dimms to allocate @n more bytes from
+ * @label_id: unique identifier for the namespace consuming this dpa range
+ * @n: number of bytes per-dimm to add to the existing allocation
+ *
+ * Assumes resources are ordered.  For BLK regions, first consume
+ * BLK-only available DPA free space, then consume PMEM-aliased DPA
+ * space starting at the highest DPA.  For PMEM regions start
+ * allocations from the start of an interleave set and end at the first
+ * BLK allocation or the end of the interleave set, whichever comes
+ * first.
+ */
+static int grow_dpa_allocation(struct nd_region *nd_region,
+		struct nd_label_id *label_id, resource_size_t n)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		int rc;
+
+		rc = scan_allocate(nd_region, nd_mapping, label_id, n);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm, resource_size_t size)
+{
+	struct resource *res = &nspm->nsio.res;
+
+	res->start = nd_region->ndr_start;
+	res->end = nd_region->ndr_start + size - 1;
+}
+
+static ssize_t __size_store(struct device *dev, unsigned long long val)
+{
+	resource_size_t allocated = 0, available = 0;
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_mapping *nd_mapping;
+	struct nvdimm_drvdata *ndd;
+	struct nd_label_id label_id;
+	u32 flags = 0, remainder;
+	u8 *uuid = NULL;
+	int rc, i;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	}
+
+	/*
+	 * We need a uuid for the allocation-label and dimm(s) on which
+	 * to store the label.
+	 */
+	if (!uuid || nd_region->ndr_mappings == 0)
+		return -ENXIO;
+
+	div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder);
+	if (remainder) {
+		dev_dbg(dev, "%llu is not %dK aligned\n", val,
+				(SZ_4K * nd_region->ndr_mappings) / SZ_1K);
+		return -EINVAL;
+	}
+
+	nd_label_gen_id(&label_id, uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		nd_mapping = &nd_region->mapping[i];
+		ndd = to_ndd(nd_mapping);
+
+		/*
+		 * All dimms in an interleave set, or the base dimm for a blk
+		 * region, need to be enabled for the size to be changed.
+		 */
+		if (!ndd)
+			return -ENXIO;
+
+		allocated += nvdimm_allocated_dpa(ndd, &label_id);
+	}
+	available = nd_region_available_dpa(nd_region);
+
+	if (val > available + allocated)
+		return -ENOSPC;
+
+	if (val == allocated)
+		return 0;
+
+	val = div_u64(val, nd_region->ndr_mappings);
+	allocated = div_u64(allocated, nd_region->ndr_mappings);
+	if (val < allocated)
+		rc = shrink_dpa_allocation(nd_region, &label_id,
+				allocated - val);
+	else
+		rc = grow_dpa_allocation(nd_region, &label_id, val - allocated);
+
+	if (rc)
+		return rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		nd_namespace_pmem_set_size(nd_region, nspm,
+				val * nd_region->ndr_mappings);
+	}
+
+	return rc;
+}
+
+static ssize_t size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long long val;
+	u8 **uuid = NULL;
+	int rc;
+
+	rc = kstrtoull(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __size_store(dev, val);
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = &nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		rc = -ENXIO;
+	}
+
+	if (rc == 0 && val == 0 && uuid) {
+		/* setting size zero == 'delete namespace' */
+		kfree(*uuid);
+		*uuid = NULL;
+	}
+
+	dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
+			? "fail" : "success", rc);
+
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+
+static ssize_t size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		return sprintf(buf, "%llu\n", (unsigned long long)
+				resource_size(&nspm->nsio.res));
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else if (is_namespace_io(dev)) {
+		struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+		return sprintf(buf, "%llu\n", (unsigned long long)
+				resource_size(&nsio->res));
+	} else
+		return -ENXIO;
+}
+static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	u8 *uuid;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else
+		return -ENXIO;
+
+	if (uuid)
+		return sprintf(buf, "%pUb\n", uuid);
+	return sprintf(buf, "\n");
+}
+
+/**
+ * namespace_update_uuid - check for a unique uuid and whether we're "renaming"
+ * @nd_region: parent region so we can updates all dimms in the set
+ * @dev: namespace type for generating label_id
+ * @new_uuid: incoming uuid
+ * @old_uuid: reference to the uuid storage location in the namespace object
+ */
+static int namespace_update_uuid(struct nd_region *nd_region,
+		struct device *dev, u8 *new_uuid, u8 **old_uuid)
+{
+	u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0;
+	struct nd_label_id old_label_id;
+	struct nd_label_id new_label_id;
+	int i, rc;
+
+	rc = nd_is_uuid_unique(dev, new_uuid) ? 0 : -EINVAL;
+	if (rc) {
+		kfree(new_uuid);
+		return rc;
+	}
+
+	if (*old_uuid == NULL)
+		goto out;
+
+	nd_label_gen_id(&old_label_id, *old_uuid, flags);
+	nd_label_gen_id(&new_label_id, new_uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct resource *res;
+
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, old_label_id.id) == 0)
+				sprintf((void *) res->name, "%s",
+						new_label_id.id);
+	}
+	kfree(*old_uuid);
+ out:
+	*old_uuid = new_uuid;
+	return 0;
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	u8 *uuid = NULL;
+	u8 **ns_uuid;
+	ssize_t rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_uuid = &nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else
+		return -ENXIO;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = nd_uuid_store(dev, &uuid, buf, len);
+	if (rc >= 0)
+		rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t resource_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct resource *res;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		res = &nspm->nsio.res;
+	} else if (is_namespace_io(dev)) {
+		struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+		res = &nsio->res;
+	} else
+		return -ENXIO;
+
+	/* no address to convey if the namespace has no allocation */
+	if (resource_size(res) == 0)
+		return -ENXIO;
+	return sprintf(buf, "%#llx\n", (unsigned long long) res->start);
+}
+static DEVICE_ATTR_RO(resource);
+
 static struct attribute *nd_namespace_attributes[] = {
 	&dev_attr_nstype.attr,
+	&dev_attr_size.attr,
+	&dev_attr_uuid.attr,
+	&dev_attr_resource.attr,
+	&dev_attr_alt_name.attr,
 	NULL,
 };
 
+static umode_t namespace_visible(struct kobject *kobj,
+		struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (a == &dev_attr_resource.attr) {
+		if (is_namespace_blk(dev))
+			return 0;
+		return a->mode;
+	}
+
+	if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
+		if (a == &dev_attr_size.attr)
+			return S_IWUSR | S_IRUGO;
+		return a->mode;
+	}
+
+	if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr)
+		return a->mode;
+
+	return 0;
+}
+
 static struct attribute_group nd_namespace_attribute_group = {
 	.attrs = nd_namespace_attributes,
+	.is_visible = namespace_visible,
 };
 
 static const struct attribute_group *nd_namespace_attribute_groups[] = {
@@ -81,23 +784,318 @@ static struct device **create_namespace_io(struct nd_region *nd_region)
 	return devs;
 }
 
+static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
+		u64 cookie, u16 pos)
+{
+	struct nd_namespace_label *found = NULL;
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *nd_label;
+		bool found_uuid = false;
+		int l;
+
+		for_each_label(l, nd_label, nd_mapping->labels) {
+			u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+			u16 position = __le16_to_cpu(nd_label->position);
+			u16 nlabel = __le16_to_cpu(nd_label->nlabel);
+
+			if (isetcookie != cookie)
+				continue;
+
+			if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
+				continue;
+
+			if (found_uuid) {
+				dev_dbg(to_ndd(nd_mapping)->dev,
+						"%s duplicate entry for uuid\n",
+						__func__);
+				return false;
+			}
+			found_uuid = true;
+			if (nlabel != nd_region->ndr_mappings)
+				continue;
+			if (position != pos)
+				continue;
+			found = nd_label;
+			break;
+		}
+		if (found)
+			break;
+	}
+	return found != NULL;
+}
+
+static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
+{
+	struct nd_namespace_label *select = NULL;
+	int i;
+
+	if (!pmem_id)
+		return -ENODEV;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *nd_label;
+		u64 hw_start, hw_end, pmem_start, pmem_end;
+		int l;
+
+		for_each_label(l, nd_label, nd_mapping->labels)
+			if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
+				break;
+
+		if (!nd_label) {
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		select = nd_label;
+		/*
+		 * Check that this label is compliant with the dpa
+		 * range published in NFIT
+		 */
+		hw_start = nd_mapping->start;
+		hw_end = hw_start + nd_mapping->size;
+		pmem_start = __le64_to_cpu(select->dpa);
+		pmem_end = pmem_start + __le64_to_cpu(select->rawsize);
+		if (pmem_start == hw_start && pmem_end <= hw_end)
+			/* pass */;
+		else
+			return -EINVAL;
+
+		nd_mapping->labels[0] = select;
+		nd_mapping->labels[1] = NULL;
+	}
+	return 0;
+}
+
+/**
+ * find_pmem_label_set - validate interleave set labelling, retrieve label0
+ * @nd_region: region with mappings to validate
+ */
+static int find_pmem_label_set(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm)
+{
+	u64 cookie = nd_region_interleave_set_cookie(nd_region);
+	struct nd_namespace_label *nd_label;
+	u8 select_id[NSLABEL_UUID_LEN];
+	resource_size_t size = 0;
+	u8 *pmem_id = NULL;
+	int rc = -ENODEV, l;
+	u16 i;
+
+	if (cookie == 0)
+		return -ENXIO;
+
+	/*
+	 * Find a complete set of labels by uuid.  By definition we can start
+	 * with any mapping as the reference label
+	 */
+	for_each_label(l, nd_label, nd_region->mapping[0].labels) {
+		u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+
+		if (isetcookie != cookie)
+			continue;
+
+		for (i = 0; nd_region->ndr_mappings; i++)
+			if (!has_uuid_at_pos(nd_region, nd_label->uuid,
+						cookie, i))
+				break;
+		if (i < nd_region->ndr_mappings) {
+			/*
+			 * Give up if we don't find an instance of a
+			 * uuid at each position (from 0 to
+			 * nd_region->ndr_mappings - 1), or if we find a
+			 * dimm with two instances of the same uuid.
+			 */
+			rc = -EINVAL;
+			goto err;
+		} else if (pmem_id) {
+			/*
+			 * If there is more than one valid uuid set, we
+			 * need userspace to clean this up.
+			 */
+			rc = -EBUSY;
+			goto err;
+		}
+		memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN);
+		pmem_id = select_id;
+	}
+
+	/*
+	 * Fix up each mapping's 'labels' to have the validated pmem label for
+	 * that position at labels[0], and NULL at labels[1].  In the process,
+	 * check that the namespace aligns with interleave-set.  We know
+	 * that it does not overlap with any blk namespaces by virtue of
+	 * the dimm being enabled (i.e. nd_label_reserve_dpa()
+	 * succeeded).
+	 */
+	rc = select_pmem_id(nd_region, pmem_id);
+	if (rc)
+		goto err;
+
+	/* Calculate total size and populate namespace properties from label0 */
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *label0 = nd_mapping->labels[0];
+
+		size += __le64_to_cpu(label0->rawsize);
+		if (__le16_to_cpu(label0->position) != 0)
+			continue;
+		WARN_ON(nspm->alt_name || nspm->uuid);
+		nspm->alt_name = kmemdup((void __force *) label0->name,
+				NSLABEL_NAME_LEN, GFP_KERNEL);
+		nspm->uuid = kmemdup((void __force *) label0->uuid,
+				NSLABEL_UUID_LEN, GFP_KERNEL);
+	}
+
+	if (!nspm->alt_name || !nspm->uuid) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	nd_namespace_pmem_set_size(nd_region, nspm, size);
+
+	return 0;
+ err:
+	switch (rc) {
+	case -EINVAL:
+		dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
+		break;
+	case -ENODEV:
+		dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
+		break;
+	default:
+		dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
+				__func__, rc);
+		break;
+	}
+	return rc;
+}
+
+static struct device **create_namespace_pmem(struct nd_region *nd_region)
+{
+	struct nd_namespace_pmem *nspm;
+	struct device *dev, **devs;
+	struct resource *res;
+	int rc;
+
+	nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+	if (!nspm)
+		return NULL;
+
+	dev = &nspm->nsio.dev;
+	dev->type = &namespace_pmem_device_type;
+	dev->parent = &nd_region->dev;
+	res = &nspm->nsio.res;
+	res->name = dev_name(&nd_region->dev);
+	res->flags = IORESOURCE_MEM;
+	rc = find_pmem_label_set(nd_region, nspm);
+	if (rc == -ENODEV) {
+		int i;
+
+		/* Pass, try to permit namespace creation... */
+		for (i = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+		}
+
+		/* Publish a zero-sized namespace for userspace to configure. */
+		nd_namespace_pmem_set_size(nd_region, nspm, 0);
+
+		rc = 0;
+	} else if (rc)
+		goto err;
+
+	devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
+	if (!devs)
+		goto err;
+
+	devs[0] = dev;
+	return devs;
+
+ err:
+	namespace_pmem_release(&nspm->nsio.dev);
+	return NULL;
+}
+
+static int init_active_labels(struct nd_region *nd_region)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+		int count, j;
+
+		/*
+		 * If the dimm is disabled then prevent the region from
+		 * being activated if it aliases DPA.
+		 */
+		if (!ndd) {
+			if ((nvdimm->flags & NDD_ALIASING) == 0)
+				return 0;
+			dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n",
+					dev_name(&nd_mapping->nvdimm->dev));
+			return -ENXIO;
+		}
+		nd_mapping->ndd = ndd;
+		atomic_inc(&nvdimm->busy);
+		get_ndd(ndd);
+
+		count = nd_label_active_count(ndd);
+		dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
+		if (!count)
+			continue;
+		nd_mapping->labels = kcalloc(count + 1, sizeof(void *),
+				GFP_KERNEL);
+		if (!nd_mapping->labels)
+			return -ENOMEM;
+		for (j = 0; j < count; j++) {
+			struct nd_namespace_label *label;
+
+			label = nd_label_active(ndd, j);
+			nd_mapping->labels[j] = label;
+		}
+	}
+
+	return 0;
+}
+
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
 {
 	struct device **devs = NULL;
-	int i;
+	int i, rc = 0, type;
 
 	*err = 0;
-	switch (nd_region_to_nstype(nd_region)) {
+	nvdimm_bus_lock(&nd_region->dev);
+	rc = init_active_labels(nd_region);
+	if (rc) {
+		nvdimm_bus_unlock(&nd_region->dev);
+		return rc;
+	}
+
+	type = nd_region_to_nstype(nd_region);
+	switch (type) {
 	case ND_DEVICE_NAMESPACE_IO:
 		devs = create_namespace_io(nd_region);
 		break;
+	case ND_DEVICE_NAMESPACE_PMEM:
+		devs = create_namespace_pmem(nd_region);
+		break;
 	default:
 		break;
 	}
+	nvdimm_bus_unlock(&nd_region->dev);
 
 	if (!devs)
 		return -ENODEV;
 
+	nd_region->ns_seed = devs[0];
 	for (i = 0; devs[i]; i++) {
 		struct device *dev = devs[i];
 
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 6a4b2c066ee7..c6c889292bab 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -56,4 +56,16 @@ int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
 int nd_match_dimm(struct device *dev, void *data);
+struct nd_label_id;
+char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
+bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
+struct nd_region;
+struct nvdimm_drvdata;
+struct nd_mapping;
+resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, resource_size_t *overlap);
+resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
+resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id);
+void get_ndd(struct nvdimm_drvdata *ndd);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 401fa0d5b6ea..03e610cd9f43 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -16,6 +16,7 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
+#include <linux/types.h>
 #include "label.h"
 
 struct nvdimm_drvdata {
@@ -25,6 +26,7 @@ struct nvdimm_drvdata {
 	void *data;
 	int ns_current, ns_next;
 	struct resource dpa;
+	struct kref kref;
 };
 
 struct nd_region_namespaces {
@@ -59,12 +61,19 @@ static inline struct nd_namespace_index *to_next_namespace_index(
 		(unsigned long long) (res ? resource_size(res) : 0), \
 		(unsigned long long) (res ? res->start : 0), ##arg)
 
+#define for_each_label(l, label, labels) \
+	for (l = 0; (label = labels ? labels[l] : NULL); l++)
+
+#define for_each_dpa_resource(ndd, res) \
+	for (res = (ndd)->dpa.child; res; res = res->sibling)
+
 #define for_each_dpa_resource_safe(ndd, res, next) \
 	for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
 			res; res = next, next = next ? next->sibling : NULL)
 
 struct nd_region {
 	struct device dev;
+	struct device *ns_seed;
 	u16 ndr_mappings;
 	u64 ndr_size;
 	u64 ndr_start;
@@ -88,20 +97,28 @@ enum nd_async_mode {
 	ND_ASYNC,
 };
 
+void wait_nvdimm_bus_probe_idle(struct device *dev);
 void nd_device_register(struct device *dev);
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
+int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+		size_t len);
 int __init nvdimm_init(void);
 int __init nd_region_init(void);
 void nvdimm_exit(void);
 void nd_region_exit(void);
+struct nvdimm;
+struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
 int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
 void nvdimm_bus_lock(struct device *dev);
 void nvdimm_bus_unlock(struct device *dev);
 bool is_nvdimm_bus_locked(struct device *dev);
+void nvdimm_drvdata_release(struct kref *kref);
+void put_ndd(struct nvdimm_drvdata *ndd);
 int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
 void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
 struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index d46975ed9e40..90902a142e35 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -203,6 +203,23 @@ static int nd_pmem_probe(struct device *dev)
 	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
 	struct pmem_device *pmem;
 
+	if (resource_size(&nsio->res) < ND_MIN_NAMESPACE_SIZE) {
+		resource_size_t size = resource_size(&nsio->res);
+
+		dev_dbg(dev, "%s: size: %pa, too small must be at least %#x\n",
+				__func__, &size, ND_MIN_NAMESPACE_SIZE);
+		return -ENODEV;
+	}
+
+	if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_PMEM) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		if (!nspm->uuid) {
+			dev_dbg(dev, "%s: uuid not set\n", __func__);
+			return -ENODEV;
+		}
+	}
+
 	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
@@ -222,13 +239,14 @@ static int nd_pmem_remove(struct device *dev)
 
 MODULE_ALIAS("pmem");
 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
 static struct nd_device_driver nd_pmem_driver = {
 	.probe = nd_pmem_probe,
 	.remove = nd_pmem_remove,
 	.drv = {
 		.name = "nd_pmem",
 	},
-	.type = ND_DRIVER_NAMESPACE_IO,
+	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
 };
 
 static int __init pmem_init(void)
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index ade3dba81afd..9aba44e483e0 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -61,8 +61,11 @@ static int child_unregister(struct device *dev, void *data)
 
 static int nd_region_remove(struct device *dev)
 {
+	struct nd_region *nd_region = to_nd_region(dev);
+
 	/* flush attribute readers and disable */
 	nvdimm_bus_lock(dev);
+	nd_region->ns_seed = NULL;
 	dev_set_drvdata(dev, NULL);
 	nvdimm_bus_unlock(dev);
 
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 1571424578f0..b45806f7176d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/io.h>
+#include <linux/nd.h>
 #include "nd-core.h"
 #include "nd.h"
 
@@ -99,6 +100,58 @@ int nd_region_to_nstype(struct nd_region *nd_region)
 
 	return 0;
 }
+EXPORT_SYMBOL(nd_region_to_nstype);
+
+static int is_uuid_busy(struct device *dev, void *data)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	u8 *uuid = data;
+
+	switch (nd_region_to_nstype(nd_region)) {
+	case ND_DEVICE_NAMESPACE_PMEM: {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		if (!nspm->uuid)
+			break;
+		if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0)
+			return -EBUSY;
+		break;
+	}
+	case ND_DEVICE_NAMESPACE_BLK: {
+		/* TODO: blk namespace support */
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int is_namespace_uuid_busy(struct device *dev, void *data)
+{
+	if (is_nd_pmem(dev) || is_nd_blk(dev))
+		return device_for_each_child(dev, data, is_uuid_busy);
+	return 0;
+}
+
+/**
+ * nd_is_uuid_unique - verify that no other namespace has @uuid
+ * @dev: any device on a nvdimm_bus
+ * @uuid: uuid to check
+ */
+bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return false;
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+	if (device_for_each_child(&nvdimm_bus->dev, uuid,
+				is_namespace_uuid_busy) != 0)
+		return false;
+	return true;
+}
 
 static ssize_t size_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
@@ -151,6 +204,60 @@ static ssize_t set_cookie_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(set_cookie);
 
+resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
+{
+	resource_size_t blk_max_overlap = 0, available, overlap;
+	int i;
+
+	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
+
+ retry:
+	available = 0;
+	overlap = blk_max_overlap;
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+
+		/* if a dimm is disabled the available capacity is zero */
+		if (!ndd)
+			return 0;
+
+		if (is_nd_pmem(&nd_region->dev)) {
+			available += nd_pmem_available_dpa(nd_region,
+					nd_mapping, &overlap);
+			if (overlap > blk_max_overlap) {
+				blk_max_overlap = overlap;
+				goto retry;
+			}
+		} else if (is_nd_blk(&nd_region->dev)) {
+			/* TODO: BLK Namespace support */
+		}
+	}
+
+	return available;
+}
+
+static ssize_t available_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	unsigned long long available = 0;
+
+	/*
+	 * Flush in-flight updates and grab a snapshot of the available
+	 * size.  Of course, this value is potentially invalidated the
+	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
+	 * problem to not race itself.
+	 */
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	available = nd_region_available_dpa(nd_region);
+	nvdimm_bus_unlock(dev);
+
+	return sprintf(buf, "%llu\n", available);
+}
+static DEVICE_ATTR_RO(available_size);
+
 static ssize_t init_namespaces_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -168,11 +275,29 @@ static ssize_t init_namespaces_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(init_namespaces);
 
+static ssize_t namespace_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nd_region->ns_seed)
+		rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+static DEVICE_ATTR_RO(namespace_seed);
+
 static struct attribute *nd_region_attributes[] = {
 	&dev_attr_size.attr,
 	&dev_attr_nstype.attr,
 	&dev_attr_mappings.attr,
 	&dev_attr_set_cookie.attr,
+	&dev_attr_available_size.attr,
+	&dev_attr_namespace_seed.attr,
 	&dev_attr_init_namespaces.attr,
 	NULL,
 };
@@ -182,12 +307,18 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
 	struct nd_region *nd_region = to_nd_region(dev);
 	struct nd_interleave_set *nd_set = nd_region->nd_set;
+	int type = nd_region_to_nstype(nd_region);
 
-	if (a != &dev_attr_set_cookie.attr)
+	if (a != &dev_attr_set_cookie.attr
+			&& a != &dev_attr_available_size.attr)
 		return a->mode;
 
-	if (is_nd_pmem(dev) && nd_set)
-			return a->mode;
+	if ((type == ND_DEVICE_NAMESPACE_PMEM
+				|| type == ND_DEVICE_NAMESPACE_BLK)
+			&& a == &dev_attr_available_size.attr)
+		return a->mode;
+	else if (is_nd_pmem(dev) && nd_set)
+		return a->mode;
 
 	return 0;
 }
@@ -198,6 +329,15 @@ struct attribute_group nd_region_attribute_group = {
 };
 EXPORT_SYMBOL_GPL(nd_region_attribute_group);
 
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
+{
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+	if (nd_set)
+		return nd_set->cookie;
+	return 0;
+}
+
 /*
  * Upon successful probe/remove, take/release a reference on the
  * associated interleave set (if present)
@@ -205,18 +345,20 @@ EXPORT_SYMBOL_GPL(nd_region_attribute_group);
 static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
 		struct device *dev, bool probe)
 {
-	if (is_nd_pmem(dev) || is_nd_blk(dev)) {
+	if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) {
 		struct nd_region *nd_region = to_nd_region(dev);
 		int i;
 
 		for (i = 0; i < nd_region->ndr_mappings; i++) {
 			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+			struct nvdimm_drvdata *ndd = nd_mapping->ndd;
 			struct nvdimm *nvdimm = nd_mapping->nvdimm;
 
-			if (probe)
-				atomic_inc(&nvdimm->busy);
-			else
-				atomic_dec(&nvdimm->busy);
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+			put_ndd(ndd);
+			nd_mapping->ndd = NULL;
+			atomic_dec(&nvdimm->busy);
 		}
 	}
 }
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 1b627b109360..c130972e08c4 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -41,10 +41,20 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len);
 
+struct nd_namespace_label;
+struct nvdimm_drvdata;
 struct nd_mapping {
 	struct nvdimm *nvdimm;
+	struct nd_namespace_label **labels;
 	u64 start;
 	u64 size;
+	/*
+	 * @ndd is for private use at region enable / disable time for
+	 * get_ndd() + put_ndd(), all other nd_mapping to ndd
+	 * conversions use to_ndd() which respects enabled state of the
+	 * nvdimm.
+	 */
+	struct nvdimm_drvdata *ndd;
 };
 
 struct nvdimm_bus_descriptor {
diff --git a/include/linux/nd.h b/include/linux/nd.h
index da70e9962197..255c38a83083 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -28,16 +28,40 @@ static inline struct nd_device_driver *to_nd_device_driver(
 	return container_of(drv, struct nd_device_driver, drv);
 };
 
+/**
+ * struct nd_namespace_io - infrastructure for loading an nd_pmem instance
+ * @dev: namespace device created by the nd region driver
+ * @res: struct resource conversion of a NFIT SPA table
+ */
 struct nd_namespace_io {
 	struct device dev;
 	struct resource res;
 };
 
+/**
+ * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory
+ * @nsio: device and system physical address range to drive
+ * @alt_name: namespace name supplied in the dimm label
+ * @uuid: namespace name supplied in the dimm label
+ */
+struct nd_namespace_pmem {
+	struct nd_namespace_io nsio;
+	char *alt_name;
+	u8 *uuid;
+};
+
 static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
 {
 	return container_of(dev, struct nd_namespace_io, dev);
 }
 
+static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+	return container_of(nsio, struct nd_namespace_pmem, nsio);
+}
+
 #define MODULE_ALIAS_ND_DEVICE(type) \
 	MODULE_ALIAS("nd:t" __stringify(type) "*")
 #define ND_DEVICE_MODALIAS_FMT "nd:t%d"
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 1357a87b8714..2b94ea2287bb 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -190,4 +190,8 @@ enum nd_driver_flags {
 	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
 	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
 };
+
+enum {
+	ND_MIN_NAMESPACE_SIZE = 0x00400000,
+};
 #endif /* __NDCTL_H__ */
-- 
cgit v1.2.3


From 5532fba2061a203fa05dab87ae3f4213cd52bb8e Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 26 Jun 2015 10:03:14 -0400
Subject: dm: bump the ioctl version to 4.32.0

This fix enables userspace to detect that the dm-stats changes from the
4.2 merge are in place.

Reported-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/uapi/linux/dm-ioctl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index eac8c3641f39..061aca3a962d 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	31
+#define DM_VERSION_MINOR	32
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2015-3-12)"
+#define DM_VERSION_EXTRA	"-ioctl (2015-6-26)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3